// Copyright 2012 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/arm/simulator-arm.h"

#if defined(USE_SIMULATOR)

#include <stdarg.h>
#include <stdlib.h>
#include <cmath>

#include "src/arm/constants-arm.h"
#include "src/assembler-inl.h"
#include "src/base/bits.h"
#include "src/base/lazy-instance.h"
#include "src/disasm.h"
#include "src/macro-assembler.h"
#include "src/objects-inl.h"
#include "src/ostreams.h"
#include "src/runtime/runtime-utils.h"
#include "src/utils.h"
#include "src/vector.h"

// Only build the simulator if not compiling for real ARM hardware.
namespace v8 {
namespace internal {

    DEFINE_LAZY_LEAKY_OBJECT_GETTER(Simulator::GlobalMonitor,
        Simulator::GlobalMonitor::Get)

// This macro provides a platform independent use of sscanf. The reason for
// SScanF not being implemented in a platform independent way through
// ::v8::internal::OS in the same way as SNPrintF is that the
// Windows C Run-Time Library does not provide vsscanf.
#define SScanF sscanf // NOLINT

    // The ArmDebugger class is used by the simulator while debugging simulated ARM
    // code.
    class ArmDebugger {
    public:
        explicit ArmDebugger(Simulator* sim)
            : sim_(sim)
        {
        }

        void Stop(Instruction* instr);
        void Debug();

    private:
        static const Instr kBreakpointInstr = (al | (7 * B25) | (1 * B24) | kBreakpoint);
        static const Instr kNopInstr = (al | (13 * B21));

        Simulator* sim_;

        int32_t GetRegisterValue(int regnum);
        double GetRegisterPairDoubleValue(int regnum);
        double GetVFPDoubleRegisterValue(int regnum);
        bool GetValue(const char* desc, int32_t* value);
        bool GetVFPSingleValue(const char* desc, float* value);
        bool GetVFPDoubleValue(const char* desc, double* value);

        // Set or delete a breakpoint. Returns true if successful.
        bool SetBreakpoint(Instruction* breakpc);
        bool DeleteBreakpoint(Instruction* breakpc);

        // Undo and redo all breakpoints. This is needed to bracket disassembly and
        // execution to skip past breakpoints when run from the debugger.
        void UndoBreakpoints();
        void RedoBreakpoints();
    };

    void ArmDebugger::Stop(Instruction* instr)
    {
        // Get the stop code.
        uint32_t code = instr->SvcValue() & kStopCodeMask;
        // Print the stop message and code if it is not the default code.
        if (code != kMaxStopCode) {
            PrintF("Simulator hit stop %u\n", code);
        } else {
            PrintF("Simulator hit\n");
        }
        Debug();
    }

    int32_t ArmDebugger::GetRegisterValue(int regnum)
    {
        if (regnum == kPCRegister) {
            return sim_->get_pc();
        } else {
            return sim_->get_register(regnum);
        }
    }

    double ArmDebugger::GetRegisterPairDoubleValue(int regnum)
    {
        return sim_->get_double_from_register_pair(regnum);
    }

    double ArmDebugger::GetVFPDoubleRegisterValue(int regnum)
    {
        return sim_->get_double_from_d_register(regnum).get_scalar();
    }

    bool ArmDebugger::GetValue(const char* desc, int32_t* value)
    {
        int regnum = Registers::Number(desc);
        if (regnum != kNoRegister) {
            *value = GetRegisterValue(regnum);
            return true;
        } else {
            if (strncmp(desc, "0x", 2) == 0) {
                return SScanF(desc + 2, "%x", reinterpret_cast<uint32_t*>(value)) == 1;
            } else {
                return SScanF(desc, "%u", reinterpret_cast<uint32_t*>(value)) == 1;
            }
        }
        return false;
    }

    bool ArmDebugger::GetVFPSingleValue(const char* desc, float* value)
    {
        bool is_double;
        int regnum = VFPRegisters::Number(desc, &is_double);
        if (regnum != kNoRegister && !is_double) {
            *value = sim_->get_float_from_s_register(regnum).get_scalar();
            return true;
        }
        return false;
    }

    bool ArmDebugger::GetVFPDoubleValue(const char* desc, double* value)
    {
        bool is_double;
        int regnum = VFPRegisters::Number(desc, &is_double);
        if (regnum != kNoRegister && is_double) {
            *value = sim_->get_double_from_d_register(regnum).get_scalar();
            return true;
        }
        return false;
    }

    bool ArmDebugger::SetBreakpoint(Instruction* breakpc)
    {
        // Check if a breakpoint can be set. If not return without any side-effects.
        if (sim_->break_pc_ != nullptr) {
            return false;
        }

        // Set the breakpoint.
        sim_->break_pc_ = breakpc;
        sim_->break_instr_ = breakpc->InstructionBits();
        // Not setting the breakpoint instruction in the code itself. It will be set
        // when the debugger shell continues.
        return true;
    }

    bool ArmDebugger::DeleteBreakpoint(Instruction* breakpc)
    {
        if (sim_->break_pc_ != nullptr) {
            sim_->break_pc_->SetInstructionBits(sim_->break_instr_);
        }

        sim_->break_pc_ = nullptr;
        sim_->break_instr_ = 0;
        return true;
    }

    void ArmDebugger::UndoBreakpoints()
    {
        if (sim_->break_pc_ != nullptr) {
            sim_->break_pc_->SetInstructionBits(sim_->break_instr_);
        }
    }

    void ArmDebugger::RedoBreakpoints()
    {
        if (sim_->break_pc_ != nullptr) {
            sim_->break_pc_->SetInstructionBits(kBreakpointInstr);
        }
    }

    void ArmDebugger::Debug()
    {
        intptr_t last_pc = -1;
        bool done = false;

#define COMMAND_SIZE 63
#define ARG_SIZE 255

#define STR(a) #a
#define XSTR(a) STR(a)

        char cmd[COMMAND_SIZE + 1];
        char arg1[ARG_SIZE + 1];
        char arg2[ARG_SIZE + 1];
        char* argv[3] = { cmd, arg1, arg2 };

        // make sure to have a proper terminating character if reaching the limit
        cmd[COMMAND_SIZE] = 0;
        arg1[ARG_SIZE] = 0;
        arg2[ARG_SIZE] = 0;

        // Undo all set breakpoints while running in the debugger shell. This will
        // make them invisible to all commands.
        UndoBreakpoints();

        while (!done && !sim_->has_bad_pc()) {
            if (last_pc != sim_->get_pc()) {
                disasm::NameConverter converter;
                disasm::Disassembler dasm(converter);
                // use a reasonably large buffer
                v8::internal::EmbeddedVector<char, 256> buffer;
                dasm.InstructionDecode(buffer,
                    reinterpret_cast<byte*>(sim_->get_pc()));
                PrintF("  0x%08x  %s\n", sim_->get_pc(), buffer.start());
                last_pc = sim_->get_pc();
            }
            char* line = ReadLine("sim> ");
            if (line == nullptr) {
                break;
            } else {
                char* last_input = sim_->last_debugger_input();
                if (strcmp(line, "\n") == 0 && last_input != nullptr) {
                    line = last_input;
                } else {
                    // Ownership is transferred to sim_;
                    sim_->set_last_debugger_input(line);
                }
                // Use sscanf to parse the individual parts of the command line. At the
                // moment no command expects more than two parameters.
                int argc = SScanF(line,
                    "%" XSTR(COMMAND_SIZE) "s "
                                           "%" XSTR(ARG_SIZE) "s "
                                                              "%" XSTR(ARG_SIZE) "s",
                    cmd, arg1, arg2);
                if ((strcmp(cmd, "si") == 0) || (strcmp(cmd, "stepi") == 0)) {
                    sim_->InstructionDecode(reinterpret_cast<Instruction*>(sim_->get_pc()));
                } else if ((strcmp(cmd, "c") == 0) || (strcmp(cmd, "cont") == 0)) {
                    // Execute the one instruction we broke at with breakpoints disabled.
                    sim_->InstructionDecode(reinterpret_cast<Instruction*>(sim_->get_pc()));
                    // Leave the debugger shell.
                    done = true;
                } else if ((strcmp(cmd, "p") == 0) || (strcmp(cmd, "print") == 0)) {
                    if (argc == 2 || (argc == 3 && strcmp(arg2, "fp") == 0)) {
                        int32_t value;
                        float svalue;
                        double dvalue;
                        if (strcmp(arg1, "all") == 0) {
                            for (int i = 0; i < kNumRegisters; i++) {
                                value = GetRegisterValue(i);
                                PrintF("%3s: 0x%08x %10d", RegisterName(Register::from_code(i)),
                                    value, value);
                                if ((argc == 3 && strcmp(arg2, "fp") == 0) && i < 8 && (i % 2) == 0) {
                                    dvalue = GetRegisterPairDoubleValue(i);
                                    PrintF(" (%f)\n", dvalue);
                                } else {
                                    PrintF("\n");
                                }
                            }
                            for (int i = 0; i < DwVfpRegister::NumRegisters(); i++) {
                                dvalue = GetVFPDoubleRegisterValue(i);
                                uint64_t as_words = bit_cast<uint64_t>(dvalue);
                                PrintF("%3s: %f 0x%08x %08x\n", VFPRegisters::Name(i, true),
                                    dvalue, static_cast<uint32_t>(as_words >> 32),
                                    static_cast<uint32_t>(as_words & 0xFFFFFFFF));
                            }
                        } else {
                            if (GetValue(arg1, &value)) {
                                PrintF("%s: 0x%08x %d \n", arg1, value, value);
                            } else if (GetVFPSingleValue(arg1, &svalue)) {
                                uint32_t as_word = bit_cast<uint32_t>(svalue);
                                PrintF("%s: %f 0x%08x\n", arg1, svalue, as_word);
                            } else if (GetVFPDoubleValue(arg1, &dvalue)) {
                                uint64_t as_words = bit_cast<uint64_t>(dvalue);
                                PrintF("%s: %f 0x%08x %08x\n", arg1, dvalue,
                                    static_cast<uint32_t>(as_words >> 32),
                                    static_cast<uint32_t>(as_words & 0xFFFFFFFF));
                            } else {
                                PrintF("%s unrecognized\n", arg1);
                            }
                        }
                    } else {
                        PrintF("print <register>\n");
                    }
                } else if ((strcmp(cmd, "po") == 0)
                    || (strcmp(cmd, "printobject") == 0)) {
                    if (argc == 2) {
                        int32_t value;
                        StdoutStream os;
                        if (GetValue(arg1, &value)) {
                            Object obj(value);
                            os << arg1 << ": \n";
#ifdef DEBUG
                            obj->Print(os);
                            os << "\n";
#else
                            os << Brief(obj) << "\n";
#endif
                        } else {
                            os << arg1 << " unrecognized\n";
                        }
                    } else {
                        PrintF("printobject <value>\n");
                    }
                } else if (strcmp(cmd, "stack") == 0 || strcmp(cmd, "mem") == 0) {
                    int32_t* cur = nullptr;
                    int32_t* end = nullptr;
                    int next_arg = 1;

                    if (strcmp(cmd, "stack") == 0) {
                        cur = reinterpret_cast<int32_t*>(sim_->get_register(Simulator::sp));
                    } else { // "mem"
                        int32_t value;
                        if (!GetValue(arg1, &value)) {
                            PrintF("%s unrecognized\n", arg1);
                            continue;
                        }
                        cur = reinterpret_cast<int32_t*>(value);
                        next_arg++;
                    }

                    int32_t words;
                    if (argc == next_arg) {
                        words = 10;
                    } else {
                        if (!GetValue(argv[next_arg], &words)) {
                            words = 10;
                        }
                    }
                    end = cur + words;

                    while (cur < end) {
                        PrintF("  0x%08" V8PRIxPTR ":  0x%08x %10d",
                            reinterpret_cast<intptr_t>(cur), *cur, *cur);
                        Object obj(*cur);
                        Heap* current_heap = sim_->isolate_->heap();
                        if (obj.IsSmi() || current_heap->Contains(HeapObject::cast(obj))) {
                            PrintF(" (");
                            if (obj.IsSmi()) {
                                PrintF("smi %d", Smi::ToInt(obj));
                            } else {
                                obj->ShortPrint();
                            }
                            PrintF(")");
                        }
                        PrintF("\n");
                        cur++;
                    }
                } else if (strcmp(cmd, "disasm") == 0 || strcmp(cmd, "di") == 0) {
                    disasm::NameConverter converter;
                    disasm::Disassembler dasm(converter);
                    // use a reasonably large buffer
                    v8::internal::EmbeddedVector<char, 256> buffer;

                    byte* prev = nullptr;
                    byte* cur = nullptr;
                    byte* end = nullptr;

                    if (argc == 1) {
                        cur = reinterpret_cast<byte*>(sim_->get_pc());
                        end = cur + (10 * kInstrSize);
                    } else if (argc == 2) {
                        int regnum = Registers::Number(arg1);
                        if (regnum != kNoRegister || strncmp(arg1, "0x", 2) == 0) {
                            // The argument is an address or a register name.
                            int32_t value;
                            if (GetValue(arg1, &value)) {
                                cur = reinterpret_cast<byte*>(value);
                                // Disassemble 10 instructions at <arg1>.
                                end = cur + (10 * kInstrSize);
                            }
                        } else {
                            // The argument is the number of instructions.
                            int32_t value;
                            if (GetValue(arg1, &value)) {
                                cur = reinterpret_cast<byte*>(sim_->get_pc());
                                // Disassemble <arg1> instructions.
                                end = cur + (value * kInstrSize);
                            }
                        }
                    } else {
                        int32_t value1;
                        int32_t value2;
                        if (GetValue(arg1, &value1) && GetValue(arg2, &value2)) {
                            cur = reinterpret_cast<byte*>(value1);
                            end = cur + (value2 * kInstrSize);
                        }
                    }

                    while (cur < end) {
                        prev = cur;
                        cur += dasm.InstructionDecode(buffer, cur);
                        PrintF("  0x%08" V8PRIxPTR "  %s\n", reinterpret_cast<intptr_t>(prev),
                            buffer.start());
                    }
                } else if (strcmp(cmd, "gdb") == 0) {
                    PrintF("relinquishing control to gdb\n");
                    v8::base::OS::DebugBreak();
                    PrintF("regaining control from gdb\n");
                } else if (strcmp(cmd, "break") == 0) {
                    if (argc == 2) {
                        int32_t value;
                        if (GetValue(arg1, &value)) {
                            if (!SetBreakpoint(reinterpret_cast<Instruction*>(value))) {
                                PrintF("setting breakpoint failed\n");
                            }
                        } else {
                            PrintF("%s unrecognized\n", arg1);
                        }
                    } else {
                        PrintF("break <address>\n");
                    }
                } else if (strcmp(cmd, "del") == 0) {
                    if (!DeleteBreakpoint(nullptr)) {
                        PrintF("deleting breakpoint failed\n");
                    }
                } else if (strcmp(cmd, "flags") == 0) {
                    PrintF("N flag: %d; ", sim_->n_flag_);
                    PrintF("Z flag: %d; ", sim_->z_flag_);
                    PrintF("C flag: %d; ", sim_->c_flag_);
                    PrintF("V flag: %d\n", sim_->v_flag_);
                    PrintF("INVALID OP flag: %d; ", sim_->inv_op_vfp_flag_);
                    PrintF("DIV BY ZERO flag: %d; ", sim_->div_zero_vfp_flag_);
                    PrintF("OVERFLOW flag: %d; ", sim_->overflow_vfp_flag_);
                    PrintF("UNDERFLOW flag: %d; ", sim_->underflow_vfp_flag_);
                    PrintF("INEXACT flag: %d;\n", sim_->inexact_vfp_flag_);
                } else if (strcmp(cmd, "stop") == 0) {
                    int32_t value;
                    intptr_t stop_pc = sim_->get_pc() - kInstrSize;
                    Instruction* stop_instr = reinterpret_cast<Instruction*>(stop_pc);
                    if ((argc == 2) && (strcmp(arg1, "unstop") == 0)) {
                        // Remove the current stop.
                        if (sim_->isStopInstruction(stop_instr)) {
                            stop_instr->SetInstructionBits(kNopInstr);
                        } else {
                            PrintF("Not at debugger stop.\n");
                        }
                    } else if (argc == 3) {
                        // Print information about all/the specified breakpoint(s).
                        if (strcmp(arg1, "info") == 0) {
                            if (strcmp(arg2, "all") == 0) {
                                PrintF("Stop information:\n");
                                for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
                                    sim_->PrintStopInfo(i);
                                }
                            } else if (GetValue(arg2, &value)) {
                                sim_->PrintStopInfo(value);
                            } else {
                                PrintF("Unrecognized argument.\n");
                            }
                        } else if (strcmp(arg1, "enable") == 0) {
                            // Enable all/the specified breakpoint(s).
                            if (strcmp(arg2, "all") == 0) {
                                for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
                                    sim_->EnableStop(i);
                                }
                            } else if (GetValue(arg2, &value)) {
                                sim_->EnableStop(value);
                            } else {
                                PrintF("Unrecognized argument.\n");
                            }
                        } else if (strcmp(arg1, "disable") == 0) {
                            // Disable all/the specified breakpoint(s).
                            if (strcmp(arg2, "all") == 0) {
                                for (uint32_t i = 0; i < sim_->kNumOfWatchedStops; i++) {
                                    sim_->DisableStop(i);
                                }
                            } else if (GetValue(arg2, &value)) {
                                sim_->DisableStop(value);
                            } else {
                                PrintF("Unrecognized argument.\n");
                            }
                        }
                    } else {
                        PrintF("Wrong usage. Use help command for more information.\n");
                    }
                } else if ((strcmp(cmd, "t") == 0) || strcmp(cmd, "trace") == 0) {
                    ::v8::internal::FLAG_trace_sim = !::v8::internal::FLAG_trace_sim;
                    PrintF("Trace of executed instructions is %s\n",
                        ::v8::internal::FLAG_trace_sim ? "on" : "off");
                } else if ((strcmp(cmd, "h") == 0) || (strcmp(cmd, "help") == 0)) {
                    PrintF("cont\n");
                    PrintF("  continue execution (alias 'c')\n");
                    PrintF("stepi\n");
                    PrintF("  step one instruction (alias 'si')\n");
                    PrintF("print <register>\n");
                    PrintF("  print register content (alias 'p')\n");
                    PrintF("  use register name 'all' to print all registers\n");
                    PrintF("  add argument 'fp' to print register pair double values\n");
                    PrintF("printobject <register>\n");
                    PrintF("  print an object from a register (alias 'po')\n");
                    PrintF("flags\n");
                    PrintF("  print flags\n");
                    PrintF("stack [<words>]\n");
                    PrintF("  dump stack content, default dump 10 words)\n");
                    PrintF("mem <address> [<words>]\n");
                    PrintF("  dump memory content, default dump 10 words)\n");
                    PrintF("disasm [<instructions>]\n");
                    PrintF("disasm [<address/register>]\n");
                    PrintF("disasm [[<address/register>] <instructions>]\n");
                    PrintF("  disassemble code, default is 10 instructions\n");
                    PrintF("  from pc (alias 'di')\n");
                    PrintF("gdb\n");
                    PrintF("  enter gdb\n");
                    PrintF("break <address>\n");
                    PrintF("  set a break point on the address\n");
                    PrintF("del\n");
                    PrintF("  delete the breakpoint\n");
                    PrintF("trace (alias 't')\n");
                    PrintF("  toogle the tracing of all executed statements\n");
                    PrintF("stop feature:\n");
                    PrintF("  Description:\n");
                    PrintF("    Stops are debug instructions inserted by\n");
                    PrintF("    the Assembler::stop() function.\n");
                    PrintF("    When hitting a stop, the Simulator will\n");
                    PrintF("    stop and give control to the ArmDebugger.\n");
                    PrintF("    The first %d stop codes are watched:\n",
                        Simulator::kNumOfWatchedStops);
                    PrintF("    - They can be enabled / disabled: the Simulator\n");
                    PrintF("      will / won't stop when hitting them.\n");
                    PrintF("    - The Simulator keeps track of how many times they \n");
                    PrintF("      are met. (See the info command.) Going over a\n");
                    PrintF("      disabled stop still increases its counter. \n");
                    PrintF("  Commands:\n");
                    PrintF("    stop info all/<code> : print infos about number <code>\n");
                    PrintF("      or all stop(s).\n");
                    PrintF("    stop enable/disable all/<code> : enables / disables\n");
                    PrintF("      all or number <code> stop(s)\n");
                    PrintF("    stop unstop\n");
                    PrintF("      ignore the stop instruction at the current location\n");
                    PrintF("      from now on\n");
                } else {
                    PrintF("Unknown command: %s\n", cmd);
                }
            }
        }

        // Add all the breakpoints back to stop execution and enter the debugger
        // shell when hit.
        RedoBreakpoints();

#undef COMMAND_SIZE
#undef ARG_SIZE

#undef STR
#undef XSTR
    }

    bool Simulator::ICacheMatch(void* one, void* two)
    {
        DCHECK_EQ(reinterpret_cast<intptr_t>(one) & CachePage::kPageMask, 0);
        DCHECK_EQ(reinterpret_cast<intptr_t>(two) & CachePage::kPageMask, 0);
        return one == two;
    }

    static uint32_t ICacheHash(void* key)
    {
        return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(key)) >> 2;
    }

    static bool AllOnOnePage(uintptr_t start, int size)
    {
        intptr_t start_page = (start & ~CachePage::kPageMask);
        intptr_t end_page = ((start + size) & ~CachePage::kPageMask);
        return start_page == end_page;
    }

    void Simulator::set_last_debugger_input(char* input)
    {
        DeleteArray(last_debugger_input_);
        last_debugger_input_ = input;
    }

    void Simulator::SetRedirectInstruction(Instruction* instruction)
    {
        instruction->SetInstructionBits(al | (0xF * B24) | kCallRtRedirected);
    }

    void Simulator::FlushICache(base::CustomMatcherHashMap* i_cache,
        void* start_addr, size_t size)
    {
        intptr_t start = reinterpret_cast<intptr_t>(start_addr);
        int intra_line = (start & CachePage::kLineMask);
        start -= intra_line;
        size += intra_line;
        size = ((size - 1) | CachePage::kLineMask) + 1;
        int offset = (start & CachePage::kPageMask);
        while (!AllOnOnePage(start, size - 1)) {
            int bytes_to_flush = CachePage::kPageSize - offset;
            FlushOnePage(i_cache, start, bytes_to_flush);
            start += bytes_to_flush;
            size -= bytes_to_flush;
            DCHECK_EQ(0, start & CachePage::kPageMask);
            offset = 0;
        }
        if (size != 0) {
            FlushOnePage(i_cache, start, size);
        }
    }

    CachePage* Simulator::GetCachePage(base::CustomMatcherHashMap* i_cache,
        void* page)
    {
        base::HashMap::Entry* entry = i_cache->LookupOrInsert(page, ICacheHash(page));
        if (entry->value == nullptr) {
            CachePage* new_page = new CachePage();
            entry->value = new_page;
        }
        return reinterpret_cast<CachePage*>(entry->value);
    }

    // Flush from start up to and not including start + size.
    void Simulator::FlushOnePage(base::CustomMatcherHashMap* i_cache,
        intptr_t start, int size)
    {
        DCHECK_LE(size, CachePage::kPageSize);
        DCHECK(AllOnOnePage(start, size - 1));
        DCHECK_EQ(start & CachePage::kLineMask, 0);
        DCHECK_EQ(size & CachePage::kLineMask, 0);
        void* page = reinterpret_cast<void*>(start & (~CachePage::kPageMask));
        int offset = (start & CachePage::kPageMask);
        CachePage* cache_page = GetCachePage(i_cache, page);
        char* valid_bytemap = cache_page->ValidityByte(offset);
        memset(valid_bytemap, CachePage::LINE_INVALID, size >> CachePage::kLineShift);
    }

    void Simulator::CheckICache(base::CustomMatcherHashMap* i_cache,
        Instruction* instr)
    {
        intptr_t address = reinterpret_cast<intptr_t>(instr);
        void* page = reinterpret_cast<void*>(address & (~CachePage::kPageMask));
        void* line = reinterpret_cast<void*>(address & (~CachePage::kLineMask));
        int offset = (address & CachePage::kPageMask);
        CachePage* cache_page = GetCachePage(i_cache, page);
        char* cache_valid_byte = cache_page->ValidityByte(offset);
        bool cache_hit = (*cache_valid_byte == CachePage::LINE_VALID);
        char* cached_line = cache_page->CachedData(offset & ~CachePage::kLineMask);
        if (cache_hit) {
            // Check that the data in memory matches the contents of the I-cache.
            CHECK_EQ(0, memcmp(reinterpret_cast<void*>(instr), cache_page->CachedData(offset), kInstrSize));
        } else {
            // Cache miss.  Load memory into the cache.
            memcpy(cached_line, line, CachePage::kLineLength);
            *cache_valid_byte = CachePage::LINE_VALID;
        }
    }

    Simulator::Simulator(Isolate* isolate)
        : isolate_(isolate)
    {
        // Set up simulator support first. Some of this information is needed to
        // setup the architecture state.
        size_t stack_size = 1 * 1024 * 1024; // allocate 1MB for stack
        stack_ = reinterpret_cast<char*>(malloc(stack_size));
        pc_modified_ = false;
        icount_ = 0;
        break_pc_ = nullptr;
        break_instr_ = 0;

        // Set up architecture state.
        // All registers are initialized to zero to start with.
        for (int i = 0; i < num_registers; i++) {
            registers_[i] = 0;
        }
        n_flag_ = false;
        z_flag_ = false;
        c_flag_ = false;
        v_flag_ = false;

        // Initializing VFP registers.
        // All registers are initialized to zero to start with
        // even though s_registers_ & d_registers_ share the same
        // physical registers in the target.
        for (int i = 0; i < num_d_registers * 2; i++) {
            vfp_registers_[i] = 0;
        }
        n_flag_FPSCR_ = false;
        z_flag_FPSCR_ = false;
        c_flag_FPSCR_ = false;
        v_flag_FPSCR_ = false;
        FPSCR_rounding_mode_ = RN;
        FPSCR_default_NaN_mode_ = false;

        inv_op_vfp_flag_ = false;
        div_zero_vfp_flag_ = false;
        overflow_vfp_flag_ = false;
        underflow_vfp_flag_ = false;
        inexact_vfp_flag_ = false;

        // The sp is initialized to point to the bottom (high address) of the
        // allocated stack area. To be safe in potential stack underflows we leave
        // some buffer below.
        registers_[sp] = reinterpret_cast<int32_t>(stack_) + stack_size - 64;
        // The lr and pc are initialized to a known bad value that will cause an
        // access violation if the simulator ever tries to execute it.
        registers_[pc] = bad_lr;
        registers_[lr] = bad_lr;

        last_debugger_input_ = nullptr;
    }

    Simulator::~Simulator()
    {
        GlobalMonitor::Get()->RemoveProcessor(&global_monitor_processor_);
        free(stack_);
    }

    // Get the active Simulator for the current thread.
    Simulator* Simulator::current(Isolate* isolate)
    {
        v8::internal::Isolate::PerIsolateThreadData* isolate_data = isolate->FindOrAllocatePerThreadDataForThisThread();
        DCHECK_NOT_NULL(isolate_data);

        Simulator* sim = isolate_data->simulator();
        if (sim == nullptr) {
            // TODO(146): delete the simulator object when a thread/isolate goes away.
            sim = new Simulator(isolate);
            isolate_data->set_simulator(sim);
        }
        return sim;
    }

    // Sets the register in the architecture state. It will also deal with updating
    // Simulator internal state for special registers such as PC.
    void Simulator::set_register(int reg, int32_t value)
    {
        DCHECK((reg >= 0) && (reg < num_registers));
        if (reg == pc) {
            pc_modified_ = true;
        }
        registers_[reg] = value;
    }

    // Get the register from the architecture state. This function does handle
    // the special case of accessing the PC register.
    int32_t Simulator::get_register(int reg) const
    {
        DCHECK((reg >= 0) && (reg < num_registers));
        // Stupid code added to avoid bug in GCC.
        // See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43949
        if (reg >= num_registers)
            return 0;
        // End stupid code.
        return registers_[reg] + ((reg == pc) ? Instruction::kPcLoadDelta : 0);
    }

    double Simulator::get_double_from_register_pair(int reg)
    {
        DCHECK((reg >= 0) && (reg < num_registers) && ((reg % 2) == 0));

        double dm_val = 0.0;
        // Read the bits from the unsigned integer register_[] array
        // into the double precision floating point value and return it.
        char buffer[2 * sizeof(vfp_registers_[0])];
        memcpy(buffer, &registers_[reg], 2 * sizeof(registers_[0]));
        memcpy(&dm_val, buffer, 2 * sizeof(registers_[0]));
        return (dm_val);
    }

    void Simulator::set_register_pair_from_double(int reg, double* value)
    {
        DCHECK((reg >= 0) && (reg < num_registers) && ((reg % 2) == 0));
        memcpy(registers_ + reg, value, sizeof(*value));
    }

    void Simulator::set_dw_register(int dreg, const int* dbl)
    {
        DCHECK((dreg >= 0) && (dreg < num_d_registers));
        registers_[dreg] = dbl[0];
        registers_[dreg + 1] = dbl[1];
    }

    void Simulator::get_d_register(int dreg, uint64_t* value)
    {
        DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
        memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value));
    }

    void Simulator::set_d_register(int dreg, const uint64_t* value)
    {
        DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
        memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value));
    }

    void Simulator::get_d_register(int dreg, uint32_t* value)
    {
        DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
        memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
    }

    void Simulator::set_d_register(int dreg, const uint32_t* value)
    {
        DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
        memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
    }

    template <typename T, int SIZE>
    void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)])
    {
        DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
        DCHECK_LE(0, reg);
        DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
        memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE);
    }

    template <typename T, int SIZE>
    void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)])
    {
        DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
        DCHECK_LE(0, reg);
        DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
        memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE);
    }

    // Raw access to the PC register.
    void Simulator::set_pc(int32_t value)
    {
        pc_modified_ = true;
        registers_[pc] = value;
    }

    bool Simulator::has_bad_pc() const
    {
        return ((registers_[pc] == bad_lr) || (registers_[pc] == end_sim_pc));
    }

    // Raw access to the PC register without the special adjustment when reading.
    int32_t Simulator::get_pc() const
    {
        return registers_[pc];
    }

    // Getting from and setting into VFP registers.
    void Simulator::set_s_register(int sreg, unsigned int value)
    {
        DCHECK((sreg >= 0) && (sreg < num_s_registers));
        vfp_registers_[sreg] = value;
    }

    unsigned int Simulator::get_s_register(int sreg) const
    {
        DCHECK((sreg >= 0) && (sreg < num_s_registers));
        return vfp_registers_[sreg];
    }

    template <class InputType, int register_size>
    void Simulator::SetVFPRegister(int reg_index, const InputType& value)
    {
        unsigned bytes = register_size * sizeof(vfp_registers_[0]);
        DCHECK_EQ(sizeof(InputType), bytes);
        DCHECK_GE(reg_index, 0);
        if (register_size == 1)
            DCHECK(reg_index < num_s_registers);
        if (register_size == 2)
            DCHECK(reg_index < DwVfpRegister::NumRegisters());

        memcpy(&vfp_registers_[reg_index * register_size], &value, bytes);
    }

    template <class ReturnType, int register_size>
    ReturnType Simulator::GetFromVFPRegister(int reg_index)
    {
        unsigned bytes = register_size * sizeof(vfp_registers_[0]);
        DCHECK_EQ(sizeof(ReturnType), bytes);
        DCHECK_GE(reg_index, 0);
        if (register_size == 1)
            DCHECK(reg_index < num_s_registers);
        if (register_size == 2)
            DCHECK(reg_index < DwVfpRegister::NumRegisters());

        ReturnType value;
        memcpy(&value, &vfp_registers_[register_size * reg_index], bytes);
        return value;
    }

    void Simulator::SetSpecialRegister(SRegisterFieldMask reg_and_mask,
        uint32_t value)
    {
        // Only CPSR_f is implemented. Of that, only N, Z, C and V are implemented.
        if ((reg_and_mask == CPSR_f) && ((value & ~kSpecialCondition) == 0)) {
            n_flag_ = ((value & (1 << 31)) != 0);
            z_flag_ = ((value & (1 << 30)) != 0);
            c_flag_ = ((value & (1 << 29)) != 0);
            v_flag_ = ((value & (1 << 28)) != 0);
        } else {
            UNIMPLEMENTED();
        }
    }

    uint32_t Simulator::GetFromSpecialRegister(SRegister reg)
    {
        uint32_t result = 0;
        // Only CPSR_f is implemented.
        if (reg == CPSR) {
            if (n_flag_)
                result |= (1 << 31);
            if (z_flag_)
                result |= (1 << 30);
            if (c_flag_)
                result |= (1 << 29);
            if (v_flag_)
                result |= (1 << 28);
        } else {
            UNIMPLEMENTED();
        }
        return result;
    }

    // Runtime FP routines take:
    // - two double arguments
    // - one double argument and zero or one integer arguments.
    // All are consructed here from r0-r3 or d0, d1 and r0.
    void Simulator::GetFpArgs(double* x, double* y, int32_t* z)
    {
        if (use_eabi_hardfloat()) {
            *x = get_double_from_d_register(0).get_scalar();
            *y = get_double_from_d_register(1).get_scalar();
            *z = get_register(0);
        } else {
            // Registers 0 and 1 -> x.
            *x = get_double_from_register_pair(0);
            // Register 2 and 3 -> y.
            *y = get_double_from_register_pair(2);
            // Register 2 -> z
            *z = get_register(2);
        }
    }

    // The return value is either in r0/r1 or d0.
    void Simulator::SetFpResult(const double& result)
    {
        if (use_eabi_hardfloat()) {
            char buffer[2 * sizeof(vfp_registers_[0])];
            memcpy(buffer, &result, sizeof(buffer));
            // Copy result to d0.
            memcpy(vfp_registers_, buffer, sizeof(buffer));
        } else {
            char buffer[2 * sizeof(registers_[0])];
            memcpy(buffer, &result, sizeof(buffer));
            // Copy result to r0 and r1.
            memcpy(registers_, buffer, sizeof(buffer));
        }
    }

    void Simulator::TrashCallerSaveRegisters()
    {
        // We don't trash the registers with the return value.
        registers_[2] = 0x50BAD4U;
        registers_[3] = 0x50BAD4U;
        registers_[12] = 0x50BAD4U;
    }

    int Simulator::ReadW(int32_t addr)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoad(addr);
        intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
        return *ptr;
    }

    int Simulator::ReadExW(int32_t addr)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoadExcl(addr, TransactionSize::Word);
        GlobalMonitor::Get()->NotifyLoadExcl_Locked(addr, &global_monitor_processor_);
        intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
        return *ptr;
    }

    void Simulator::WriteW(int32_t addr, int value)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyStore(addr);
        GlobalMonitor::Get()->NotifyStore_Locked(addr, &global_monitor_processor_);
        intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
        *ptr = value;
    }

    int Simulator::WriteExW(int32_t addr, int value)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        if (local_monitor_.NotifyStoreExcl(addr, TransactionSize::Word) && GlobalMonitor::Get()->NotifyStoreExcl_Locked(addr, &global_monitor_processor_)) {
            intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
            *ptr = value;
            return 0;
        } else {
            return 1;
        }
    }

    uint16_t Simulator::ReadHU(int32_t addr)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoad(addr);
        uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
        return *ptr;
    }

    int16_t Simulator::ReadH(int32_t addr)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoad(addr);
        int16_t* ptr = reinterpret_cast<int16_t*>(addr);
        return *ptr;
    }

    uint16_t Simulator::ReadExHU(int32_t addr)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoadExcl(addr, TransactionSize::HalfWord);
        GlobalMonitor::Get()->NotifyLoadExcl_Locked(addr, &global_monitor_processor_);
        uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
        return *ptr;
    }

    void Simulator::WriteH(int32_t addr, uint16_t value)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyStore(addr);
        GlobalMonitor::Get()->NotifyStore_Locked(addr, &global_monitor_processor_);
        uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
        *ptr = value;
    }

    void Simulator::WriteH(int32_t addr, int16_t value)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyStore(addr);
        GlobalMonitor::Get()->NotifyStore_Locked(addr, &global_monitor_processor_);
        int16_t* ptr = reinterpret_cast<int16_t*>(addr);
        *ptr = value;
    }

    int Simulator::WriteExH(int32_t addr, uint16_t value)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        if (local_monitor_.NotifyStoreExcl(addr, TransactionSize::HalfWord) && GlobalMonitor::Get()->NotifyStoreExcl_Locked(addr, &global_monitor_processor_)) {
            uint16_t* ptr = reinterpret_cast<uint16_t*>(addr);
            *ptr = value;
            return 0;
        } else {
            return 1;
        }
    }

    uint8_t Simulator::ReadBU(int32_t addr)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoad(addr);
        uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
        return *ptr;
    }

    int8_t Simulator::ReadB(int32_t addr)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoad(addr);
        int8_t* ptr = reinterpret_cast<int8_t*>(addr);
        return *ptr;
    }

    uint8_t Simulator::ReadExBU(int32_t addr)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoadExcl(addr, TransactionSize::Byte);
        GlobalMonitor::Get()->NotifyLoadExcl_Locked(addr, &global_monitor_processor_);
        uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
        return *ptr;
    }

    void Simulator::WriteB(int32_t addr, uint8_t value)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyStore(addr);
        GlobalMonitor::Get()->NotifyStore_Locked(addr, &global_monitor_processor_);
        uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
        *ptr = value;
    }

    void Simulator::WriteB(int32_t addr, int8_t value)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyStore(addr);
        GlobalMonitor::Get()->NotifyStore_Locked(addr, &global_monitor_processor_);
        int8_t* ptr = reinterpret_cast<int8_t*>(addr);
        *ptr = value;
    }

    int Simulator::WriteExB(int32_t addr, uint8_t value)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        if (local_monitor_.NotifyStoreExcl(addr, TransactionSize::Byte) && GlobalMonitor::Get()->NotifyStoreExcl_Locked(addr, &global_monitor_processor_)) {
            uint8_t* ptr = reinterpret_cast<uint8_t*>(addr);
            *ptr = value;
            return 0;
        } else {
            return 1;
        }
    }

    int32_t* Simulator::ReadDW(int32_t addr)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoad(addr);
        int32_t* ptr = reinterpret_cast<int32_t*>(addr);
        return ptr;
    }

    int32_t* Simulator::ReadExDW(int32_t addr)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyLoadExcl(addr, TransactionSize::DoubleWord);
        GlobalMonitor::Get()->NotifyLoadExcl_Locked(addr, &global_monitor_processor_);
        int32_t* ptr = reinterpret_cast<int32_t*>(addr);
        return ptr;
    }

    void Simulator::WriteDW(int32_t addr, int32_t value1, int32_t value2)
    {
        // All supported ARM targets allow unaligned accesses, so we don't need to
        // check the alignment here.
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        local_monitor_.NotifyStore(addr);
        GlobalMonitor::Get()->NotifyStore_Locked(addr, &global_monitor_processor_);
        int32_t* ptr = reinterpret_cast<int32_t*>(addr);
        *ptr++ = value1;
        *ptr = value2;
    }

    int Simulator::WriteExDW(int32_t addr, int32_t value1, int32_t value2)
    {
        base::MutexGuard lock_guard(&GlobalMonitor::Get()->mutex);
        if (local_monitor_.NotifyStoreExcl(addr, TransactionSize::DoubleWord) && GlobalMonitor::Get()->NotifyStoreExcl_Locked(addr, &global_monitor_processor_)) {
            intptr_t* ptr = reinterpret_cast<intptr_t*>(addr);
            *ptr++ = value1;
            *ptr = value2;
            return 0;
        } else {
            return 1;
        }
    }

    // Returns the limit of the stack area to enable checking for stack overflows.
    uintptr_t Simulator::StackLimit(uintptr_t c_limit) const
    {
        // The simulator uses a separate JS stack. If we have exhausted the C stack,
        // we also drop down the JS limit to reflect the exhaustion on the JS stack.
        if (GetCurrentStackPosition() < c_limit) {
            return reinterpret_cast<uintptr_t>(get_sp());
        }

        // Otherwise the limit is the JS stack. Leave a safety margin of 1024 bytes
        // to prevent overrunning the stack when pushing values.
        return reinterpret_cast<uintptr_t>(stack_) + 1024;
    }

    // Unsupported instructions use Format to print an error and stop execution.
    void Simulator::Format(Instruction* instr, const char* format)
    {
        PrintF("Simulator found unsupported instruction:\n 0x%08" V8PRIxPTR ": %s\n",
            reinterpret_cast<intptr_t>(instr), format);
        UNIMPLEMENTED();
    }

    // Checks if the current instruction should be executed based on its
    // condition bits.
    bool Simulator::ConditionallyExecute(Instruction* instr)
    {
        switch (instr->ConditionField()) {
        case eq:
            return z_flag_;
        case ne:
            return !z_flag_;
        case cs:
            return c_flag_;
        case cc:
            return !c_flag_;
        case mi:
            return n_flag_;
        case pl:
            return !n_flag_;
        case vs:
            return v_flag_;
        case vc:
            return !v_flag_;
        case hi:
            return c_flag_ && !z_flag_;
        case ls:
            return !c_flag_ || z_flag_;
        case ge:
            return n_flag_ == v_flag_;
        case lt:
            return n_flag_ != v_flag_;
        case gt:
            return !z_flag_ && (n_flag_ == v_flag_);
        case le:
            return z_flag_ || (n_flag_ != v_flag_);
        case al:
            return true;
        default:
            UNREACHABLE();
        }
        return false;
    }

    // Calculate and set the Negative and Zero flags.
    void Simulator::SetNZFlags(int32_t val)
    {
        n_flag_ = (val < 0);
        z_flag_ = (val == 0);
    }

    // Set the Carry flag.
    void Simulator::SetCFlag(bool val)
    {
        c_flag_ = val;
    }

    // Set the oVerflow flag.
    void Simulator::SetVFlag(bool val)
    {
        v_flag_ = val;
    }

    // Calculate C flag value for additions.
    bool Simulator::CarryFrom(int32_t left, int32_t right, int32_t carry)
    {
        uint32_t uleft = static_cast<uint32_t>(left);
        uint32_t uright = static_cast<uint32_t>(right);
        uint32_t urest = 0xFFFFFFFFU - uleft;

        return (uright > urest) || (carry && (((uright + 1) > urest) || (uright > (urest - 1))));
    }

    // Calculate C flag value for subtractions.
    bool Simulator::BorrowFrom(int32_t left, int32_t right, int32_t carry)
    {
        uint32_t uleft = static_cast<uint32_t>(left);
        uint32_t uright = static_cast<uint32_t>(right);

        return (uright > uleft) || (!carry && (((uright + 1) > uleft) || (uright > (uleft - 1))));
    }

    // Calculate V flag value for additions and subtractions.
    bool Simulator::OverflowFrom(int32_t alu_out,
        int32_t left, int32_t right, bool addition)
    {
        bool overflow;
        if (addition) {
            // operands have the same sign
            overflow = ((left >= 0 && right >= 0) || (left < 0 && right < 0))
                // and operands and result have different sign
                && ((left < 0 && alu_out >= 0) || (left >= 0 && alu_out < 0));
        } else {
            // operands have different signs
            overflow = ((left < 0 && right >= 0) || (left >= 0 && right < 0))
                // and first operand and result have different signs
                && ((left < 0 && alu_out >= 0) || (left >= 0 && alu_out < 0));
        }
        return overflow;
    }

    // Support for VFP comparisons.
    void Simulator::Compute_FPSCR_Flags(float val1, float val2)
    {
        if (/*std::*/isnan(val1) || /*std::*/isnan(val2)) {
            n_flag_FPSCR_ = false;
            z_flag_FPSCR_ = false;
            c_flag_FPSCR_ = true;
            v_flag_FPSCR_ = true;
            // All non-NaN cases.
        } else if (val1 == val2) {
            n_flag_FPSCR_ = false;
            z_flag_FPSCR_ = true;
            c_flag_FPSCR_ = true;
            v_flag_FPSCR_ = false;
        } else if (val1 < val2) {
            n_flag_FPSCR_ = true;
            z_flag_FPSCR_ = false;
            c_flag_FPSCR_ = false;
            v_flag_FPSCR_ = false;
        } else {
            // Case when (val1 > val2).
            n_flag_FPSCR_ = false;
            z_flag_FPSCR_ = false;
            c_flag_FPSCR_ = true;
            v_flag_FPSCR_ = false;
        }
    }

    void Simulator::Compute_FPSCR_Flags(double val1, double val2)
    {
        if (/*std::*/isnan(val1) || /*std::*/isnan(val2)) {
            n_flag_FPSCR_ = false;
            z_flag_FPSCR_ = false;
            c_flag_FPSCR_ = true;
            v_flag_FPSCR_ = true;
            // All non-NaN cases.
        } else if (val1 == val2) {
            n_flag_FPSCR_ = false;
            z_flag_FPSCR_ = true;
            c_flag_FPSCR_ = true;
            v_flag_FPSCR_ = false;
        } else if (val1 < val2) {
            n_flag_FPSCR_ = true;
            z_flag_FPSCR_ = false;
            c_flag_FPSCR_ = false;
            v_flag_FPSCR_ = false;
        } else {
            // Case when (val1 > val2).
            n_flag_FPSCR_ = false;
            z_flag_FPSCR_ = false;
            c_flag_FPSCR_ = true;
            v_flag_FPSCR_ = false;
        }
    }

    void Simulator::Copy_FPSCR_to_APSR()
    {
        n_flag_ = n_flag_FPSCR_;
        z_flag_ = z_flag_FPSCR_;
        c_flag_ = c_flag_FPSCR_;
        v_flag_ = v_flag_FPSCR_;
    }

    // Addressing Mode 1 - Data-processing operands:
    // Get the value based on the shifter_operand with register.
    int32_t Simulator::GetShiftRm(Instruction* instr, bool* carry_out)
    {
        ShiftOp shift = instr->ShiftField();
        int shift_amount = instr->ShiftAmountValue();
        int32_t result = get_register(instr->RmValue());
        if (instr->Bit(4) == 0) {
            // by immediate
            if ((shift == ROR) && (shift_amount == 0)) {
                UNIMPLEMENTED();
                return result;
            } else if (((shift == LSR) || (shift == ASR)) && (shift_amount == 0)) {
                shift_amount = 32;
            }
            switch (shift) {
            case ASR: {
                if (shift_amount == 0) {
                    if (result < 0) {
                        result = 0xFFFFFFFF;
                        *carry_out = true;
                    } else {
                        result = 0;
                        *carry_out = false;
                    }
                } else {
                    result >>= (shift_amount - 1);
                    *carry_out = (result & 1) == 1;
                    result >>= 1;
                }
                break;
            }

            case LSL: {
                if (shift_amount == 0) {
                    *carry_out = c_flag_;
                } else {
                    result <<= (shift_amount - 1);
                    *carry_out = (result < 0);
                    result <<= 1;
                }
                break;
            }

            case LSR: {
                if (shift_amount == 0) {
                    result = 0;
                    *carry_out = c_flag_;
                } else {
                    uint32_t uresult = static_cast<uint32_t>(result);
                    uresult >>= (shift_amount - 1);
                    *carry_out = (uresult & 1) == 1;
                    uresult >>= 1;
                    result = static_cast<int32_t>(uresult);
                }
                break;
            }

            case ROR: {
                if (shift_amount == 0) {
                    *carry_out = c_flag_;
                } else {
                    uint32_t left = static_cast<uint32_t>(result) >> shift_amount;
                    uint32_t right = static_cast<uint32_t>(result) << (32 - shift_amount);
                    result = right | left;
                    *carry_out = (static_cast<uint32_t>(result) >> 31) != 0;
                }
                break;
            }

            default: {
                UNREACHABLE();
                break;
            }
            }
        } else {
            // by register
            int rs = instr->RsValue();
            shift_amount = get_register(rs) & 0xFF;
            switch (shift) {
            case ASR: {
                if (shift_amount == 0) {
                    *carry_out = c_flag_;
                } else if (shift_amount < 32) {
                    result >>= (shift_amount - 1);
                    *carry_out = (result & 1) == 1;
                    result >>= 1;
                } else {
                    DCHECK_GE(shift_amount, 32);
                    if (result < 0) {
                        *carry_out = true;
                        result = 0xFFFFFFFF;
                    } else {
                        *carry_out = false;
                        result = 0;
                    }
                }
                break;
            }

            case LSL: {
                if (shift_amount == 0) {
                    *carry_out = c_flag_;
                } else if (shift_amount < 32) {
                    result <<= (shift_amount - 1);
                    *carry_out = (result < 0);
                    result <<= 1;
                } else if (shift_amount == 32) {
                    *carry_out = (result & 1) == 1;
                    result = 0;
                } else {
                    DCHECK_GT(shift_amount, 32);
                    *carry_out = false;
                    result = 0;
                }
                break;
            }

            case LSR: {
                if (shift_amount == 0) {
                    *carry_out = c_flag_;
                } else if (shift_amount < 32) {
                    uint32_t uresult = static_cast<uint32_t>(result);
                    uresult >>= (shift_amount - 1);
                    *carry_out = (uresult & 1) == 1;
                    uresult >>= 1;
                    result = static_cast<int32_t>(uresult);
                } else if (shift_amount == 32) {
                    *carry_out = (result < 0);
                    result = 0;
                } else {
                    *carry_out = false;
                    result = 0;
                }
                break;
            }

            case ROR: {
                if (shift_amount == 0) {
                    *carry_out = c_flag_;
                } else {
                    uint32_t left = static_cast<uint32_t>(result) >> shift_amount;
                    uint32_t right = static_cast<uint32_t>(result) << (32 - shift_amount);
                    result = right | left;
                    *carry_out = (static_cast<uint32_t>(result) >> 31) != 0;
                }
                break;
            }

            default: {
                UNREACHABLE();
                break;
            }
            }
        }
        return result;
    }

    // Addressing Mode 1 - Data-processing operands:
    // Get the value based on the shifter_operand with immediate.
    int32_t Simulator::GetImm(Instruction* instr, bool* carry_out)
    {
        int rotate = instr->RotateValue() * 2;
        int immed8 = instr->Immed8Value();
        int imm = base::bits::RotateRight32(immed8, rotate);
        *carry_out = (rotate == 0) ? c_flag_ : (imm < 0);
        return imm;
    }

    static int count_bits(int bit_vector)
    {
        int count = 0;
        while (bit_vector != 0) {
            if ((bit_vector & 1) != 0) {
                count++;
            }
            bit_vector >>= 1;
        }
        return count;
    }

    int32_t Simulator::ProcessPU(Instruction* instr,
        int num_regs,
        int reg_size,
        intptr_t* start_address,
        intptr_t* end_address)
    {
        int rn = instr->RnValue();
        int32_t rn_val = get_register(rn);
        switch (instr->PUField()) {
        case da_x: {
            UNIMPLEMENTED();
            break;
        }
        case ia_x: {
            *start_address = rn_val;
            *end_address = rn_val + (num_regs * reg_size) - reg_size;
            rn_val = rn_val + (num_regs * reg_size);
            break;
        }
        case db_x: {
            *start_address = rn_val - (num_regs * reg_size);
            *end_address = rn_val - reg_size;
            rn_val = *start_address;
            break;
        }
        case ib_x: {
            *start_address = rn_val + reg_size;
            *end_address = rn_val + (num_regs * reg_size);
            rn_val = *end_address;
            break;
        }
        default: {
            UNREACHABLE();
            break;
        }
        }
        return rn_val;
    }

    // Addressing Mode 4 - Load and Store Multiple
    void Simulator::HandleRList(Instruction* instr, bool load)
    {
        int rlist = instr->RlistValue();
        int num_regs = count_bits(rlist);

        intptr_t start_address = 0;
        intptr_t end_address = 0;
        int32_t rn_val = ProcessPU(instr, num_regs, kPointerSize, &start_address, &end_address);

        intptr_t* address = reinterpret_cast<intptr_t*>(start_address);
        // Catch null pointers a little earlier.
        DCHECK(start_address > 8191 || start_address < 0);
        int reg = 0;
        while (rlist != 0) {
            if ((rlist & 1) != 0) {
                if (load) {
                    set_register(reg, *address);
                } else {
                    *address = get_register(reg);
                }
                address += 1;
            }
            reg++;
            rlist >>= 1;
        }
        DCHECK(end_address == ((intptr_t)address) - 4);
        if (instr->HasW()) {
            set_register(instr->RnValue(), rn_val);
        }
    }

    // Addressing Mode 6 - Load and Store Multiple Coprocessor registers.
    void Simulator::HandleVList(Instruction* instr)
    {
        VFPRegPrecision precision = (instr->SzValue() == 0) ? kSinglePrecision : kDoublePrecision;
        int operand_size = (precision == kSinglePrecision) ? 4 : 8;

        bool load = (instr->VLValue() == 0x1);

        int vd;
        int num_regs;
        vd = instr->VFPDRegValue(precision);
        if (precision == kSinglePrecision) {
            num_regs = instr->Immed8Value();
        } else {
            num_regs = instr->Immed8Value() / 2;
        }

        intptr_t start_address = 0;
        intptr_t end_address = 0;
        int32_t rn_val = ProcessPU(instr, num_regs, operand_size, &start_address, &end_address);

        intptr_t* address = reinterpret_cast<intptr_t*>(start_address);
        for (int reg = vd; reg < vd + num_regs; reg++) {
            if (precision == kSinglePrecision) {
                if (load) {
                    set_s_register_from_sinteger(reg,
                        ReadW(reinterpret_cast<int32_t>(address)));
                } else {
                    WriteW(reinterpret_cast<int32_t>(address),
                        get_sinteger_from_s_register(reg));
                }
                address += 1;
            } else {
                if (load) {
                    int32_t data[] = { ReadW(reinterpret_cast<int32_t>(address)),
                        ReadW(reinterpret_cast<int32_t>(address + 1)) };
                    set_d_register(reg, reinterpret_cast<uint32_t*>(data));
                } else {
                    uint32_t data[2];
                    get_d_register(reg, data);
                    WriteW(reinterpret_cast<int32_t>(address), data[0]);
                    WriteW(reinterpret_cast<int32_t>(address + 1), data[1]);
                }
                address += 2;
            }
        }
        DCHECK(reinterpret_cast<intptr_t>(address) - operand_size == end_address);
        if (instr->HasW()) {
            set_register(instr->RnValue(), rn_val);
        }
    }

    // Calls into the V8 runtime are based on this very simple interface.
    // Note: To be able to return two values from some calls the code in runtime.cc
    // uses the ObjectPair which is essentially two 32-bit values stuffed into a
    // 64-bit value. With the code below we assume that all runtime calls return
    // 64 bits of result. If they don't, the r1 result register contains a bogus
    // value, which is fine because it is caller-saved.
    typedef int64_t (*SimulatorRuntimeCall)(int32_t arg0, int32_t arg1,
        int32_t arg2, int32_t arg3,
        int32_t arg4, int32_t arg5,
        int32_t arg6, int32_t arg7,
        int32_t arg8);

    // These prototypes handle the four types of FP calls.
    typedef int64_t (*SimulatorRuntimeCompareCall)(double darg0, double darg1);
    typedef double (*SimulatorRuntimeFPFPCall)(double darg0, double darg1);
    typedef double (*SimulatorRuntimeFPCall)(double darg0);
    typedef double (*SimulatorRuntimeFPIntCall)(double darg0, int32_t arg0);

    // This signature supports direct call in to API function native callback
    // (refer to InvocationCallback in v8.h).
    typedef void (*SimulatorRuntimeDirectApiCall)(int32_t arg0);
    typedef void (*SimulatorRuntimeProfilingApiCall)(int32_t arg0, void* arg1);

    // This signature supports direct call to accessor getter callback.
    typedef void (*SimulatorRuntimeDirectGetterCall)(int32_t arg0, int32_t arg1);
    typedef void (*SimulatorRuntimeProfilingGetterCall)(
        int32_t arg0, int32_t arg1, void* arg2);

    // Software interrupt instructions are used by the simulator to call into the
    // C-based V8 runtime.
    void Simulator::SoftwareInterrupt(Instruction* instr)
    {
        int svc = instr->SvcValue();
        switch (svc) {
        case kCallRtRedirected: {
            // Check if stack is aligned. Error if not aligned is reported below to
            // include information on the function called.
            bool stack_aligned = (get_register(sp)
                                     & (::v8::internal::FLAG_sim_stack_alignment - 1))
                == 0;
            Redirection* redirection = Redirection::FromInstruction(instr);
            int32_t arg0 = get_register(r0);
            int32_t arg1 = get_register(r1);
            int32_t arg2 = get_register(r2);
            int32_t arg3 = get_register(r3);
            int32_t* stack_pointer = reinterpret_cast<int32_t*>(get_register(sp));
            int32_t arg4 = stack_pointer[0];
            int32_t arg5 = stack_pointer[1];
            int32_t arg6 = stack_pointer[2];
            int32_t arg7 = stack_pointer[3];
            int32_t arg8 = stack_pointer[4];
            STATIC_ASSERT(kMaxCParameters == 9);

            bool fp_call = (redirection->type() == ExternalReference::BUILTIN_FP_FP_CALL) || (redirection->type() == ExternalReference::BUILTIN_COMPARE_CALL) || (redirection->type() == ExternalReference::BUILTIN_FP_CALL) || (redirection->type() == ExternalReference::BUILTIN_FP_INT_CALL);
            // This is dodgy but it works because the C entry stubs are never moved.
            // See comment in codegen-arm.cc and bug 1242173.
            int32_t saved_lr = get_register(lr);
            intptr_t external = reinterpret_cast<intptr_t>(redirection->external_function());
            if (fp_call) {
                double dval0, dval1; // one or two double parameters
                int32_t ival; // zero or one integer parameters
                int64_t iresult = 0; // integer return value
                double dresult = 0; // double return value
                GetFpArgs(&dval0, &dval1, &ival);
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    SimulatorRuntimeCall generic_target = reinterpret_cast<SimulatorRuntimeCall>(external);
                    switch (redirection->type()) {
                    case ExternalReference::BUILTIN_FP_FP_CALL:
                    case ExternalReference::BUILTIN_COMPARE_CALL:
                        PrintF("Call to host function at %p with args %f, %f",
                            reinterpret_cast<void*>(FUNCTION_ADDR(generic_target)),
                            dval0, dval1);
                        break;
                    case ExternalReference::BUILTIN_FP_CALL:
                        PrintF("Call to host function at %p with arg %f",
                            reinterpret_cast<void*>(FUNCTION_ADDR(generic_target)),
                            dval0);
                        break;
                    case ExternalReference::BUILTIN_FP_INT_CALL:
                        PrintF("Call to host function at %p with args %f, %d",
                            reinterpret_cast<void*>(FUNCTION_ADDR(generic_target)),
                            dval0, ival);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                    if (!stack_aligned) {
                        PrintF(" with unaligned stack %08x\n", get_register(sp));
                    }
                    PrintF("\n");
                }
                CHECK(stack_aligned);
                switch (redirection->type()) {
                case ExternalReference::BUILTIN_COMPARE_CALL: {
                    SimulatorRuntimeCompareCall target = reinterpret_cast<SimulatorRuntimeCompareCall>(external);
                    iresult = target(dval0, dval1);
                    set_register(r0, static_cast<int32_t>(iresult));
                    set_register(r1, static_cast<int32_t>(iresult >> 32));
                    break;
                }
                case ExternalReference::BUILTIN_FP_FP_CALL: {
                    SimulatorRuntimeFPFPCall target = reinterpret_cast<SimulatorRuntimeFPFPCall>(external);
                    dresult = target(dval0, dval1);
                    SetFpResult(dresult);
                    break;
                }
                case ExternalReference::BUILTIN_FP_CALL: {
                    SimulatorRuntimeFPCall target = reinterpret_cast<SimulatorRuntimeFPCall>(external);
                    dresult = target(dval0);
                    SetFpResult(dresult);
                    break;
                }
                case ExternalReference::BUILTIN_FP_INT_CALL: {
                    SimulatorRuntimeFPIntCall target = reinterpret_cast<SimulatorRuntimeFPIntCall>(external);
                    dresult = target(dval0, ival);
                    SetFpResult(dresult);
                    break;
                }
                default:
                    UNREACHABLE();
                    break;
                }
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    switch (redirection->type()) {
                    case ExternalReference::BUILTIN_COMPARE_CALL:
                        PrintF("Returned %08x\n", static_cast<int32_t>(iresult));
                        break;
                    case ExternalReference::BUILTIN_FP_FP_CALL:
                    case ExternalReference::BUILTIN_FP_CALL:
                    case ExternalReference::BUILTIN_FP_INT_CALL:
                        PrintF("Returned %f\n", dresult);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                }
            } else if (redirection->type() == ExternalReference::DIRECT_API_CALL) {
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    PrintF("Call to host function at %p args %08x",
                        reinterpret_cast<void*>(external), arg0);
                    if (!stack_aligned) {
                        PrintF(" with unaligned stack %08x\n", get_register(sp));
                    }
                    PrintF("\n");
                }
                CHECK(stack_aligned);
                SimulatorRuntimeDirectApiCall target = reinterpret_cast<SimulatorRuntimeDirectApiCall>(external);
                target(arg0);
            } else if (
                redirection->type() == ExternalReference::PROFILING_API_CALL) {
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    PrintF("Call to host function at %p args %08x %08x",
                        reinterpret_cast<void*>(external), arg0, arg1);
                    if (!stack_aligned) {
                        PrintF(" with unaligned stack %08x\n", get_register(sp));
                    }
                    PrintF("\n");
                }
                CHECK(stack_aligned);
                SimulatorRuntimeProfilingApiCall target = reinterpret_cast<SimulatorRuntimeProfilingApiCall>(external);
                target(arg0, Redirection::ReverseRedirection(arg1));
            } else if (
                redirection->type() == ExternalReference::DIRECT_GETTER_CALL) {
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    PrintF("Call to host function at %p args %08x %08x",
                        reinterpret_cast<void*>(external), arg0, arg1);
                    if (!stack_aligned) {
                        PrintF(" with unaligned stack %08x\n", get_register(sp));
                    }
                    PrintF("\n");
                }
                CHECK(stack_aligned);
                SimulatorRuntimeDirectGetterCall target = reinterpret_cast<SimulatorRuntimeDirectGetterCall>(external);
                target(arg0, arg1);
            } else if (
                redirection->type() == ExternalReference::PROFILING_GETTER_CALL) {
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    PrintF("Call to host function at %p args %08x %08x %08x",
                        reinterpret_cast<void*>(external), arg0, arg1, arg2);
                    if (!stack_aligned) {
                        PrintF(" with unaligned stack %08x\n", get_register(sp));
                    }
                    PrintF("\n");
                }
                CHECK(stack_aligned);
                SimulatorRuntimeProfilingGetterCall target = reinterpret_cast<SimulatorRuntimeProfilingGetterCall>(
                    external);
                target(arg0, arg1, Redirection::ReverseRedirection(arg2));
            } else {
                // builtin call.
                DCHECK(redirection->type() == ExternalReference::BUILTIN_CALL || redirection->type() == ExternalReference::BUILTIN_CALL_PAIR);
                SimulatorRuntimeCall target = reinterpret_cast<SimulatorRuntimeCall>(external);
                if (::v8::internal::FLAG_trace_sim || !stack_aligned) {
                    PrintF(
                        "Call to host function at %p "
                        "args %08x, %08x, %08x, %08x, %08x, %08x, %08x, %08x, %08x",
                        reinterpret_cast<void*>(FUNCTION_ADDR(target)), arg0, arg1, arg2,
                        arg3, arg4, arg5, arg6, arg7, arg8);
                    if (!stack_aligned) {
                        PrintF(" with unaligned stack %08x\n", get_register(sp));
                    }
                    PrintF("\n");
                }
                CHECK(stack_aligned);
                int64_t result = target(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8);
                int32_t lo_res = static_cast<int32_t>(result);
                int32_t hi_res = static_cast<int32_t>(result >> 32);
                if (::v8::internal::FLAG_trace_sim) {
                    PrintF("Returned %08x\n", lo_res);
                }
                set_register(r0, lo_res);
                set_register(r1, hi_res);
            }
            set_register(lr, saved_lr);
            set_pc(get_register(lr));
            break;
        }
        case kBreakpoint: {
            ArmDebugger dbg(this);
            dbg.Debug();
            break;
        }
        // stop uses all codes greater than 1 << 23.
        default: {
            if (svc >= (1 << 23)) {
                uint32_t code = svc & kStopCodeMask;
                if (isWatchedStop(code)) {
                    IncreaseStopCounter(code);
                }
                // Stop if it is enabled, otherwise go on jumping over the stop
                // and the message address.
                if (isEnabledStop(code)) {
                    ArmDebugger dbg(this);
                    dbg.Stop(instr);
                }
            } else {
                // This is not a valid svc code.
                UNREACHABLE();
                break;
            }
        }
        }
    }

    float Simulator::canonicalizeNaN(float value)
    {
        // Default NaN value, see "NaN handling" in "IEEE 754 standard implementation
        // choices" of the ARM Reference Manual.
        constexpr uint32_t kDefaultNaN = 0x7FC00000u;
        if (FPSCR_default_NaN_mode_ && /*std::*/isnan(value)) {
            value = bit_cast<float>(kDefaultNaN);
        }
        return value;
    }

    Float32 Simulator::canonicalizeNaN(Float32 value)
    {
        // Default NaN value, see "NaN handling" in "IEEE 754 standard implementation
        // choices" of the ARM Reference Manual.
        constexpr Float32 kDefaultNaN = Float32::FromBits(0x7FC00000u);
        return FPSCR_default_NaN_mode_ && value.is_nan() ? kDefaultNaN : value;
    }

    double Simulator::canonicalizeNaN(double value)
    {
        // Default NaN value, see "NaN handling" in "IEEE 754 standard implementation
        // choices" of the ARM Reference Manual.
        constexpr uint64_t kDefaultNaN = uint64_t { 0x7FF8000000000000 };
        if (FPSCR_default_NaN_mode_ && /*std::*/isnan(value)) {
            value = bit_cast<double>(kDefaultNaN);
        }
        return value;
    }

    Float64 Simulator::canonicalizeNaN(Float64 value)
    {
        // Default NaN value, see "NaN handling" in "IEEE 754 standard implementation
        // choices" of the ARM Reference Manual.
        constexpr Float64 kDefaultNaN = Float64::FromBits(uint64_t { 0x7FF8000000000000 });
        return FPSCR_default_NaN_mode_ && value.is_nan() ? kDefaultNaN : value;
    }

    // Stop helper functions.
    bool Simulator::isStopInstruction(Instruction* instr)
    {
        return (instr->Bits(27, 24) == 0xF) && (instr->SvcValue() >= kStopCode);
    }

    bool Simulator::isWatchedStop(uint32_t code)
    {
        DCHECK_LE(code, kMaxStopCode);
        return code < kNumOfWatchedStops;
    }

    bool Simulator::isEnabledStop(uint32_t code)
    {
        DCHECK_LE(code, kMaxStopCode);
        // Unwatched stops are always enabled.
        return !isWatchedStop(code) || !(watched_stops_[code].count & kStopDisabledBit);
    }

    void Simulator::EnableStop(uint32_t code)
    {
        DCHECK(isWatchedStop(code));
        if (!isEnabledStop(code)) {
            watched_stops_[code].count &= ~kStopDisabledBit;
        }
    }

    void Simulator::DisableStop(uint32_t code)
    {
        DCHECK(isWatchedStop(code));
        if (isEnabledStop(code)) {
            watched_stops_[code].count |= kStopDisabledBit;
        }
    }

    void Simulator::IncreaseStopCounter(uint32_t code)
    {
        DCHECK_LE(code, kMaxStopCode);
        DCHECK(isWatchedStop(code));
        if ((watched_stops_[code].count & ~(1 << 31)) == 0x7FFFFFFF) {
            PrintF("Stop counter for code %i has overflowed.\n"
                   "Enabling this code and reseting the counter to 0.\n",
                code);
            watched_stops_[code].count = 0;
            EnableStop(code);
        } else {
            watched_stops_[code].count++;
        }
    }

    // Print a stop status.
    void Simulator::PrintStopInfo(uint32_t code)
    {
        DCHECK_LE(code, kMaxStopCode);
        if (!isWatchedStop(code)) {
            PrintF("Stop not watched.");
        } else {
            const char* state = isEnabledStop(code) ? "Enabled" : "Disabled";
            int32_t count = watched_stops_[code].count & ~kStopDisabledBit;
            // Don't print the state of unused breakpoints.
            if (count != 0) {
                if (watched_stops_[code].desc) {
                    PrintF("stop %i - 0x%x: \t%s, \tcounter = %i, \t%s\n",
                        code, code, state, count, watched_stops_[code].desc);
                } else {
                    PrintF("stop %i - 0x%x: \t%s, \tcounter = %i\n",
                        code, code, state, count);
                }
            }
        }
    }

    // Handle execution based on instruction types.

    // Instruction types 0 and 1 are both rolled into one function because they
    // only differ in the handling of the shifter_operand.
    void Simulator::DecodeType01(Instruction* instr)
    {
        int type = instr->TypeValue();
        if ((type == 0) && instr->IsSpecialType0()) {
            // multiply instruction or extra loads and stores
            if (instr->Bits(7, 4) == 9) {
                if (instr->Bit(24) == 0) {
                    // Raw field decoding here. Multiply instructions have their Rd in
                    // funny places.
                    int rn = instr->RnValue();
                    int rm = instr->RmValue();
                    int rs = instr->RsValue();
                    int32_t rs_val = get_register(rs);
                    int32_t rm_val = get_register(rm);
                    if (instr->Bit(23) == 0) {
                        if (instr->Bit(21) == 0) {
                            // The MUL instruction description (A 4.1.33) refers to Rd as being
                            // the destination for the operation, but it confusingly uses the
                            // Rn field to encode it.
                            // Format(instr, "mul'cond's 'rn, 'rm, 'rs");
                            int rd = rn; // Remap the rn field to the Rd register.
                            int32_t alu_out = rm_val * rs_val;
                            set_register(rd, alu_out);
                            if (instr->HasS()) {
                                SetNZFlags(alu_out);
                            }
                        } else {
                            int rd = instr->RdValue();
                            int32_t acc_value = get_register(rd);
                            if (instr->Bit(22) == 0) {
                                // The MLA instruction description (A 4.1.28) refers to the order
                                // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
                                // Rn field to encode the Rd register and the Rd field to encode
                                // the Rn register.
                                // Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
                                int32_t mul_out = rm_val * rs_val;
                                int32_t result = acc_value + mul_out;
                                set_register(rn, result);
                            } else {
                                // Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
                                int32_t mul_out = rm_val * rs_val;
                                int32_t result = acc_value - mul_out;
                                set_register(rn, result);
                            }
                        }
                    } else {
                        // The signed/long multiply instructions use the terms RdHi and RdLo
                        // when referring to the target registers. They are mapped to the Rn
                        // and Rd fields as follows:
                        // RdLo == Rd
                        // RdHi == Rn (This is confusingly stored in variable rd here
                        //             because the mul instruction from above uses the
                        //             Rn field to encode the Rd register. Good luck figuring
                        //             this out without reading the ARM instruction manual
                        //             at a very detailed level.)
                        // Format(instr, "'um'al'cond's 'rd, 'rn, 'rs, 'rm");
                        int rd_hi = rn; // Remap the rn field to the RdHi register.
                        int rd_lo = instr->RdValue();
                        int32_t hi_res = 0;
                        int32_t lo_res = 0;
                        if (instr->Bit(22) == 1) {
                            int64_t left_op = static_cast<int32_t>(rm_val);
                            int64_t right_op = static_cast<int32_t>(rs_val);
                            uint64_t result = left_op * right_op;
                            hi_res = static_cast<int32_t>(result >> 32);
                            lo_res = static_cast<int32_t>(result & 0xFFFFFFFF);
                        } else {
                            // unsigned multiply
                            uint64_t left_op = static_cast<uint32_t>(rm_val);
                            uint64_t right_op = static_cast<uint32_t>(rs_val);
                            uint64_t result = left_op * right_op;
                            hi_res = static_cast<int32_t>(result >> 32);
                            lo_res = static_cast<int32_t>(result & 0xFFFFFFFF);
                        }
                        set_register(rd_lo, lo_res);
                        set_register(rd_hi, hi_res);
                        if (instr->HasS()) {
                            UNIMPLEMENTED();
                        }
                    }
                } else {
                    if (instr->Bits(24, 23) == 3) {
                        if (instr->Bit(20) == 1) {
                            // ldrex
                            int rt = instr->RtValue();
                            int rn = instr->RnValue();
                            int32_t addr = get_register(rn);
                            switch (instr->Bits(22, 21)) {
                            case 0: {
                                // Format(instr, "ldrex'cond 'rt, ['rn]");
                                int value = ReadExW(addr);
                                set_register(rt, value);
                                break;
                            }
                            case 1: {
                                // Format(instr, "ldrexd'cond 'rt, ['rn]");
                                int* rn_data = ReadExDW(addr);
                                set_dw_register(rt, rn_data);
                                break;
                            }
                            case 2: {
                                // Format(instr, "ldrexb'cond 'rt, ['rn]");
                                uint8_t value = ReadExBU(addr);
                                set_register(rt, value);
                                break;
                            }
                            case 3: {
                                // Format(instr, "ldrexh'cond 'rt, ['rn]");
                                uint16_t value = ReadExHU(addr);
                                set_register(rt, value);
                                break;
                            }
                            default:
                                UNREACHABLE();
                                break;
                            }
                        } else {
                            // The instruction is documented as strex rd, rt, [rn], but the
                            // "rt" register is using the rm bits.
                            int rd = instr->RdValue();
                            int rt = instr->RmValue();
                            int rn = instr->RnValue();
                            DCHECK_NE(rd, rn);
                            DCHECK_NE(rd, rt);
                            int32_t addr = get_register(rn);
                            switch (instr->Bits(22, 21)) {
                            case 0: {
                                // Format(instr, "strex'cond 'rd, 'rm, ['rn]");
                                int value = get_register(rt);
                                int status = WriteExW(addr, value);
                                set_register(rd, status);
                                break;
                            }
                            case 1: {
                                // Format(instr, "strexd'cond 'rd, 'rm, ['rn]");
                                DCHECK_EQ(rt % 2, 0);
                                int32_t value1 = get_register(rt);
                                int32_t value2 = get_register(rt + 1);
                                int status = WriteExDW(addr, value1, value2);
                                set_register(rd, status);
                                break;
                            }
                            case 2: {
                                // Format(instr, "strexb'cond 'rd, 'rm, ['rn]");
                                uint8_t value = get_register(rt);
                                int status = WriteExB(addr, value);
                                set_register(rd, status);
                                break;
                            }
                            case 3: {
                                // Format(instr, "strexh'cond 'rd, 'rm, ['rn]");
                                uint16_t value = get_register(rt);
                                int status = WriteExH(addr, value);
                                set_register(rd, status);
                                break;
                            }
                            default:
                                UNREACHABLE();
                                break;
                            }
                        }
                    } else {
                        UNIMPLEMENTED(); // Not used by V8.
                    }
                }
            } else {
                // extra load/store instructions
                int rd = instr->RdValue();
                int rn = instr->RnValue();
                int32_t rn_val = get_register(rn);
                int32_t addr = 0;
                if (instr->Bit(22) == 0) {
                    int rm = instr->RmValue();
                    int32_t rm_val = get_register(rm);
                    switch (instr->PUField()) {
                    case da_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn], -'rm");
                        DCHECK(!instr->HasW());
                        addr = rn_val;
                        rn_val -= rm_val;
                        set_register(rn, rn_val);
                        break;
                    }
                    case ia_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn], +'rm");
                        DCHECK(!instr->HasW());
                        addr = rn_val;
                        rn_val += rm_val;
                        set_register(rn, rn_val);
                        break;
                    }
                    case db_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn, -'rm]'w");
                        rn_val -= rm_val;
                        addr = rn_val;
                        if (instr->HasW()) {
                            set_register(rn, rn_val);
                        }
                        break;
                    }
                    case ib_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn, +'rm]'w");
                        rn_val += rm_val;
                        addr = rn_val;
                        if (instr->HasW()) {
                            set_register(rn, rn_val);
                        }
                        break;
                    }
                    default: {
                        // The PU field is a 2-bit field.
                        UNREACHABLE();
                        break;
                    }
                    }
                } else {
                    int32_t imm_val = (instr->ImmedHValue() << 4) | instr->ImmedLValue();
                    switch (instr->PUField()) {
                    case da_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn], #-'off8");
                        DCHECK(!instr->HasW());
                        addr = rn_val;
                        rn_val -= imm_val;
                        set_register(rn, rn_val);
                        break;
                    }
                    case ia_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn], #+'off8");
                        DCHECK(!instr->HasW());
                        addr = rn_val;
                        rn_val += imm_val;
                        set_register(rn, rn_val);
                        break;
                    }
                    case db_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn, #-'off8]'w");
                        rn_val -= imm_val;
                        addr = rn_val;
                        if (instr->HasW()) {
                            set_register(rn, rn_val);
                        }
                        break;
                    }
                    case ib_x: {
                        // Format(instr, "'memop'cond'sign'h 'rd, ['rn, #+'off8]'w");
                        rn_val += imm_val;
                        addr = rn_val;
                        if (instr->HasW()) {
                            set_register(rn, rn_val);
                        }
                        break;
                    }
                    default: {
                        // The PU field is a 2-bit field.
                        UNREACHABLE();
                        break;
                    }
                    }
                }
                if (((instr->Bits(7, 4) & 0xD) == 0xD) && (instr->Bit(20) == 0)) {
                    DCHECK_EQ(rd % 2, 0);
                    if (instr->HasH()) {
                        // The strd instruction.
                        int32_t value1 = get_register(rd);
                        int32_t value2 = get_register(rd + 1);
                        WriteDW(addr, value1, value2);
                    } else {
                        // The ldrd instruction.
                        int* rn_data = ReadDW(addr);
                        set_dw_register(rd, rn_data);
                    }
                } else if (instr->HasH()) {
                    if (instr->HasSign()) {
                        if (instr->HasL()) {
                            int16_t val = ReadH(addr);
                            set_register(rd, val);
                        } else {
                            int16_t val = get_register(rd);
                            WriteH(addr, val);
                        }
                    } else {
                        if (instr->HasL()) {
                            uint16_t val = ReadHU(addr);
                            set_register(rd, val);
                        } else {
                            uint16_t val = get_register(rd);
                            WriteH(addr, val);
                        }
                    }
                } else {
                    // signed byte loads
                    DCHECK(instr->HasSign());
                    DCHECK(instr->HasL());
                    int8_t val = ReadB(addr);
                    set_register(rd, val);
                }
                return;
            }
        } else if ((type == 0) && instr->IsMiscType0()) {
            if ((instr->Bits(27, 23) == 2) && (instr->Bits(21, 20) == 2) && (instr->Bits(15, 4) == 0xF00)) {
                // MSR
                int rm = instr->RmValue();
                DCHECK_NE(pc, rm); // UNPREDICTABLE
                SRegisterFieldMask sreg_and_mask = instr->BitField(22, 22) | instr->BitField(19, 16);
                SetSpecialRegister(sreg_and_mask, get_register(rm));
            } else if ((instr->Bits(27, 23) == 2) && (instr->Bits(21, 20) == 0) && (instr->Bits(11, 0) == 0)) {
                // MRS
                int rd = instr->RdValue();
                DCHECK_NE(pc, rd); // UNPREDICTABLE
                SRegister sreg = static_cast<SRegister>(instr->BitField(22, 22));
                set_register(rd, GetFromSpecialRegister(sreg));
            } else if (instr->Bits(22, 21) == 1) {
                int rm = instr->RmValue();
                switch (instr->BitField(7, 4)) {
                case BX:
                    set_pc(get_register(rm));
                    break;
                case BLX: {
                    uint32_t old_pc = get_pc();
                    set_pc(get_register(rm));
                    set_register(lr, old_pc + kInstrSize);
                    break;
                }
                case BKPT: {
                    ArmDebugger dbg(this);
                    PrintF("Simulator hit BKPT.\n");
                    dbg.Debug();
                    break;
                }
                default:
                    UNIMPLEMENTED();
                }
            } else if (instr->Bits(22, 21) == 3) {
                int rm = instr->RmValue();
                int rd = instr->RdValue();
                switch (instr->BitField(7, 4)) {
                case CLZ: {
                    uint32_t bits = get_register(rm);
                    int leading_zeros = 0;
                    if (bits == 0) {
                        leading_zeros = 32;
                    } else {
                        while ((bits & 0x80000000u) == 0) {
                            bits <<= 1;
                            leading_zeros++;
                        }
                    }
                    set_register(rd, leading_zeros);
                    break;
                }
                default:
                    UNIMPLEMENTED();
                }
            } else {
                PrintF("%08x\n", instr->InstructionBits());
                UNIMPLEMENTED();
            }
        } else if ((type == 1) && instr->IsNopLikeType1()) {
            if (instr->BitField(7, 0) == 0) {
                // NOP.
            } else if (instr->BitField(7, 0) == 20) {
                // CSDB.
            } else {
                PrintF("%08x\n", instr->InstructionBits());
                UNIMPLEMENTED();
            }
        } else {
            int rd = instr->RdValue();
            int rn = instr->RnValue();
            int32_t rn_val = get_register(rn);
            int32_t shifter_operand = 0;
            bool shifter_carry_out = 0;
            if (type == 0) {
                shifter_operand = GetShiftRm(instr, &shifter_carry_out);
            } else {
                DCHECK_EQ(instr->TypeValue(), 1);
                shifter_operand = GetImm(instr, &shifter_carry_out);
            }
            int32_t alu_out;

            switch (instr->OpcodeField()) {
            case AND: {
                // Format(instr, "and'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "and'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val & shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                }
                break;
            }

            case EOR: {
                // Format(instr, "eor'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "eor'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val ^ shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                }
                break;
            }

            case SUB: {
                // Format(instr, "sub'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "sub'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val - shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(!BorrowFrom(rn_val, shifter_operand));
                    SetVFlag(OverflowFrom(alu_out, rn_val, shifter_operand, false));
                }
                break;
            }

            case RSB: {
                // Format(instr, "rsb'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "rsb'cond's 'rd, 'rn, 'imm");
                alu_out = shifter_operand - rn_val;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(!BorrowFrom(shifter_operand, rn_val));
                    SetVFlag(OverflowFrom(alu_out, shifter_operand, rn_val, false));
                }
                break;
            }

            case ADD: {
                // Format(instr, "add'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "add'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val + shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(CarryFrom(rn_val, shifter_operand));
                    SetVFlag(OverflowFrom(alu_out, rn_val, shifter_operand, true));
                }
                break;
            }

            case ADC: {
                // Format(instr, "adc'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "adc'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val + shifter_operand + GetCarry();
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(CarryFrom(rn_val, shifter_operand, GetCarry()));
                    SetVFlag(OverflowFrom(alu_out, rn_val, shifter_operand, true));
                }
                break;
            }

            case SBC: {
                //        Format(instr, "sbc'cond's 'rd, 'rn, 'shift_rm");
                //        Format(instr, "sbc'cond's 'rd, 'rn, 'imm");
                alu_out = (rn_val - shifter_operand) - (GetCarry() ? 0 : 1);
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(!BorrowFrom(rn_val, shifter_operand, GetCarry()));
                    SetVFlag(OverflowFrom(alu_out, rn_val, shifter_operand, false));
                }
                break;
            }

            case RSC: {
                Format(instr, "rsc'cond's 'rd, 'rn, 'shift_rm");
                Format(instr, "rsc'cond's 'rd, 'rn, 'imm");
                break;
            }

            case TST: {
                if (instr->HasS()) {
                    // Format(instr, "tst'cond 'rn, 'shift_rm");
                    // Format(instr, "tst'cond 'rn, 'imm");
                    alu_out = rn_val & shifter_operand;
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                } else {
                    // Format(instr, "movw'cond 'rd, 'imm").
                    alu_out = instr->ImmedMovwMovtValue();
                    set_register(rd, alu_out);
                }
                break;
            }

            case TEQ: {
                if (instr->HasS()) {
                    // Format(instr, "teq'cond 'rn, 'shift_rm");
                    // Format(instr, "teq'cond 'rn, 'imm");
                    alu_out = rn_val ^ shifter_operand;
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                } else {
                    // Other instructions matching this pattern are handled in the
                    // miscellaneous instructions part above.
                    UNREACHABLE();
                }
                break;
            }

            case CMP: {
                if (instr->HasS()) {
                    // Format(instr, "cmp'cond 'rn, 'shift_rm");
                    // Format(instr, "cmp'cond 'rn, 'imm");
                    alu_out = rn_val - shifter_operand;
                    SetNZFlags(alu_out);
                    SetCFlag(!BorrowFrom(rn_val, shifter_operand));
                    SetVFlag(OverflowFrom(alu_out, rn_val, shifter_operand, false));
                } else {
                    // Format(instr, "movt'cond 'rd, 'imm").
                    alu_out = (get_register(rd) & 0xFFFF) | (instr->ImmedMovwMovtValue() << 16);
                    set_register(rd, alu_out);
                }
                break;
            }

            case CMN: {
                if (instr->HasS()) {
                    // Format(instr, "cmn'cond 'rn, 'shift_rm");
                    // Format(instr, "cmn'cond 'rn, 'imm");
                    alu_out = rn_val + shifter_operand;
                    SetNZFlags(alu_out);
                    SetCFlag(CarryFrom(rn_val, shifter_operand));
                    SetVFlag(OverflowFrom(alu_out, rn_val, shifter_operand, true));
                } else {
                    // Other instructions matching this pattern are handled in the
                    // miscellaneous instructions part above.
                    UNREACHABLE();
                }
                break;
            }

            case ORR: {
                // Format(instr, "orr'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "orr'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val | shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                }
                break;
            }

            case MOV: {
                // Format(instr, "mov'cond's 'rd, 'shift_rm");
                // Format(instr, "mov'cond's 'rd, 'imm");
                alu_out = shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                }
                break;
            }

            case BIC: {
                // Format(instr, "bic'cond's 'rd, 'rn, 'shift_rm");
                // Format(instr, "bic'cond's 'rd, 'rn, 'imm");
                alu_out = rn_val & ~shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                }
                break;
            }

            case MVN: {
                // Format(instr, "mvn'cond's 'rd, 'shift_rm");
                // Format(instr, "mvn'cond's 'rd, 'imm");
                alu_out = ~shifter_operand;
                set_register(rd, alu_out);
                if (instr->HasS()) {
                    SetNZFlags(alu_out);
                    SetCFlag(shifter_carry_out);
                }
                break;
            }

            default: {
                UNREACHABLE();
                break;
            }
            }
        }
    }

    void Simulator::DecodeType2(Instruction* instr)
    {
        int rd = instr->RdValue();
        int rn = instr->RnValue();
        int32_t rn_val = get_register(rn);
        int32_t im_val = instr->Offset12Value();
        int32_t addr = 0;
        switch (instr->PUField()) {
        case da_x: {
            // Format(instr, "'memop'cond'b 'rd, ['rn], #-'off12");
            DCHECK(!instr->HasW());
            addr = rn_val;
            rn_val -= im_val;
            set_register(rn, rn_val);
            break;
        }
        case ia_x: {
            // Format(instr, "'memop'cond'b 'rd, ['rn], #+'off12");
            DCHECK(!instr->HasW());
            addr = rn_val;
            rn_val += im_val;
            set_register(rn, rn_val);
            break;
        }
        case db_x: {
            // Format(instr, "'memop'cond'b 'rd, ['rn, #-'off12]'w");
            rn_val -= im_val;
            addr = rn_val;
            if (instr->HasW()) {
                set_register(rn, rn_val);
            }
            break;
        }
        case ib_x: {
            // Format(instr, "'memop'cond'b 'rd, ['rn, #+'off12]'w");
            rn_val += im_val;
            addr = rn_val;
            if (instr->HasW()) {
                set_register(rn, rn_val);
            }
            break;
        }
        default: {
            UNREACHABLE();
            break;
        }
        }
        if (instr->HasB()) {
            if (instr->HasL()) {
                byte val = ReadBU(addr);
                set_register(rd, val);
            } else {
                byte val = get_register(rd);
                WriteB(addr, val);
            }
        } else {
            if (instr->HasL()) {
                set_register(rd, ReadW(addr));
            } else {
                WriteW(addr, get_register(rd));
            }
        }
    }

    void Simulator::DecodeType3(Instruction* instr)
    {
        int rd = instr->RdValue();
        int rn = instr->RnValue();
        int32_t rn_val = get_register(rn);
        bool shifter_carry_out = 0;
        int32_t shifter_operand = GetShiftRm(instr, &shifter_carry_out);
        int32_t addr = 0;
        switch (instr->PUField()) {
        case da_x: {
            DCHECK(!instr->HasW());
            Format(instr, "'memop'cond'b 'rd, ['rn], -'shift_rm");
            UNIMPLEMENTED();
            break;
        }
        case ia_x: {
            if (instr->Bit(4) == 0) {
                // Memop.
            } else {
                if (instr->Bit(5) == 0) {
                    switch (instr->Bits(22, 21)) {
                    case 0:
                        if (instr->Bit(20) == 0) {
                            if (instr->Bit(6) == 0) {
                                // Pkhbt.
                                uint32_t rn_val = get_register(rn);
                                uint32_t rm_val = get_register(instr->RmValue());
                                int32_t shift = instr->Bits(11, 7);
                                rm_val <<= shift;
                                set_register(rd, (rn_val & 0xFFFF) | (rm_val & 0xFFFF0000U));
                            } else {
                                // Pkhtb.
                                uint32_t rn_val = get_register(rn);
                                int32_t rm_val = get_register(instr->RmValue());
                                int32_t shift = instr->Bits(11, 7);
                                if (shift == 0) {
                                    shift = 32;
                                }
                                rm_val >>= shift;
                                set_register(rd, (rn_val & 0xFFFF0000U) | (rm_val & 0xFFFF));
                            }
                        } else {
                            UNIMPLEMENTED();
                        }
                        break;
                    case 1:
                        UNIMPLEMENTED();
                        break;
                    case 2:
                        UNIMPLEMENTED();
                        break;
                    case 3: {
                        // Usat.
                        int32_t sat_pos = instr->Bits(20, 16);
                        int32_t sat_val = (1 << sat_pos) - 1;
                        int32_t shift = instr->Bits(11, 7);
                        int32_t shift_type = instr->Bit(6);
                        int32_t rm_val = get_register(instr->RmValue());
                        if (shift_type == 0) { // LSL
                            rm_val <<= shift;
                        } else { // ASR
                            rm_val >>= shift;
                        }
                        // If saturation occurs, the Q flag should be set in the CPSR.
                        // There is no Q flag yet, and no instruction (MRS) to read the
                        // CPSR directly.
                        if (rm_val > sat_val) {
                            rm_val = sat_val;
                        } else if (rm_val < 0) {
                            rm_val = 0;
                        }
                        set_register(rd, rm_val);
                        break;
                    }
                    }
                } else {
                    switch (instr->Bits(22, 21)) {
                    case 0:
                        UNIMPLEMENTED();
                        break;
                    case 1:
                        if (instr->Bits(9, 6) == 1) {
                            if (instr->Bit(20) == 0) {
                                if (instr->Bits(19, 16) == 0xF) {
                                    // Sxtb.
                                    int32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, static_cast<int8_t>(rm_val));
                                } else {
                                    // Sxtab.
                                    int32_t rn_val = get_register(rn);
                                    int32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, rn_val + static_cast<int8_t>(rm_val));
                                }
                            } else {
                                if (instr->Bits(19, 16) == 0xF) {
                                    // Sxth.
                                    int32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, static_cast<int16_t>(rm_val));
                                } else {
                                    // Sxtah.
                                    int32_t rn_val = get_register(rn);
                                    int32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, rn_val + static_cast<int16_t>(rm_val));
                                }
                            }
                        } else if (instr->Bits(27, 16) == 0x6BF && instr->Bits(11, 4) == 0xF3) {
                            // Rev.
                            uint32_t rm_val = get_register(instr->RmValue());
                            set_register(rd, ByteReverse(rm_val));
                        } else {
                            UNREACHABLE();
                        }
                        break;
                    case 2:
                        if ((instr->Bit(20) == 0) && (instr->Bits(9, 6) == 1)) {
                            if (instr->Bits(19, 16) == 0xF) {
                                // Uxtb16.
                                uint32_t rm_val = get_register(instr->RmValue());
                                int32_t rotate = instr->Bits(11, 10);
                                switch (rotate) {
                                case 0:
                                    break;
                                case 1:
                                    rm_val = (rm_val >> 8) | (rm_val << 24);
                                    break;
                                case 2:
                                    rm_val = (rm_val >> 16) | (rm_val << 16);
                                    break;
                                case 3:
                                    rm_val = (rm_val >> 24) | (rm_val << 8);
                                    break;
                                }
                                set_register(rd, (rm_val & 0xFF) | (rm_val & 0xFF0000));
                            } else {
                                UNIMPLEMENTED();
                            }
                        } else {
                            UNIMPLEMENTED();
                        }
                        break;
                    case 3:
                        if ((instr->Bits(9, 6) == 1)) {
                            if (instr->Bit(20) == 0) {
                                if (instr->Bits(19, 16) == 0xF) {
                                    // Uxtb.
                                    uint32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, (rm_val & 0xFF));
                                } else {
                                    // Uxtab.
                                    uint32_t rn_val = get_register(rn);
                                    uint32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, rn_val + (rm_val & 0xFF));
                                }
                            } else {
                                if (instr->Bits(19, 16) == 0xF) {
                                    // Uxth.
                                    uint32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, (rm_val & 0xFFFF));
                                } else {
                                    // Uxtah.
                                    uint32_t rn_val = get_register(rn);
                                    uint32_t rm_val = get_register(instr->RmValue());
                                    int32_t rotate = instr->Bits(11, 10);
                                    switch (rotate) {
                                    case 0:
                                        break;
                                    case 1:
                                        rm_val = (rm_val >> 8) | (rm_val << 24);
                                        break;
                                    case 2:
                                        rm_val = (rm_val >> 16) | (rm_val << 16);
                                        break;
                                    case 3:
                                        rm_val = (rm_val >> 24) | (rm_val << 8);
                                        break;
                                    }
                                    set_register(rd, rn_val + (rm_val & 0xFFFF));
                                }
                            }
                        } else {
                            // PU == 0b01, BW == 0b11, Bits(9, 6) != 0b0001
                            if ((instr->Bits(20, 16) == 0x1F) && (instr->Bits(11, 4) == 0xF3)) {
                                // Rbit.
                                uint32_t rm_val = get_register(instr->RmValue());
                                set_register(rd, base::bits::ReverseBits(rm_val));
                            } else {
                                UNIMPLEMENTED();
                            }
                        }
                        break;
                    }
                }
                return;
            }
            break;
        }
        case db_x: {
            if (instr->Bits(22, 20) == 0x5) {
                if (instr->Bits(7, 4) == 0x1) {
                    int rm = instr->RmValue();
                    int32_t rm_val = get_register(rm);
                    int rs = instr->RsValue();
                    int32_t rs_val = get_register(rs);
                    if (instr->Bits(15, 12) == 0xF) {
                        // SMMUL (in V8 notation matching ARM ISA format)
                        // Format(instr, "smmul'cond 'rn, 'rm, 'rs");
                        rn_val = base::bits::SignedMulHigh32(rm_val, rs_val);
                    } else {
                        // SMMLA (in V8 notation matching ARM ISA format)
                        // Format(instr, "smmla'cond 'rn, 'rm, 'rs, 'rd");
                        int rd = instr->RdValue();
                        int32_t rd_val = get_register(rd);
                        rn_val = base::bits::SignedMulHighAndAdd32(rm_val, rs_val, rd_val);
                    }
                    set_register(rn, rn_val);
                    return;
                }
            }
            if (instr->Bits(5, 4) == 0x1) {
                if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
                    // (s/u)div (in V8 notation matching ARM ISA format) rn = rm/rs
                    // Format(instr, "'(s/u)div'cond'b 'rn, 'rm, 'rs);
                    int rm = instr->RmValue();
                    int32_t rm_val = get_register(rm);
                    int rs = instr->RsValue();
                    int32_t rs_val = get_register(rs);
                    int32_t ret_val = 0;
                    // udiv
                    if (instr->Bit(21) == 0x1) {
                        ret_val = bit_cast<int32_t>(base::bits::UnsignedDiv32(
                            bit_cast<uint32_t>(rm_val), bit_cast<uint32_t>(rs_val)));
                    } else {
                        ret_val = base::bits::SignedDiv32(rm_val, rs_val);
                    }
                    set_register(rn, ret_val);
                    return;
                }
            }
            // Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
            addr = rn_val - shifter_operand;
            if (instr->HasW()) {
                set_register(rn, addr);
            }
            break;
        }
        case ib_x: {
            if (instr->HasW() && (instr->Bits(6, 4) == 0x5)) {
                uint32_t widthminus1 = static_cast<uint32_t>(instr->Bits(20, 16));
                uint32_t lsbit = static_cast<uint32_t>(instr->Bits(11, 7));
                uint32_t msbit = widthminus1 + lsbit;
                if (msbit <= 31) {
                    if (instr->Bit(22)) {
                        // ubfx - unsigned bitfield extract.
                        uint32_t rm_val = static_cast<uint32_t>(get_register(instr->RmValue()));
                        uint32_t extr_val = rm_val << (31 - msbit);
                        extr_val = extr_val >> (31 - widthminus1);
                        set_register(instr->RdValue(), extr_val);
                    } else {
                        // sbfx - signed bitfield extract.
                        int32_t rm_val = get_register(instr->RmValue());
                        int32_t extr_val = rm_val << (31 - msbit);
                        extr_val = extr_val >> (31 - widthminus1);
                        set_register(instr->RdValue(), extr_val);
                    }
                } else {
                    UNREACHABLE();
                }
                return;
            } else if (!instr->HasW() && (instr->Bits(6, 4) == 0x1)) {
                uint32_t lsbit = static_cast<uint32_t>(instr->Bits(11, 7));
                uint32_t msbit = static_cast<uint32_t>(instr->Bits(20, 16));
                if (msbit >= lsbit) {
                    // bfc or bfi - bitfield clear/insert.
                    uint32_t rd_val = static_cast<uint32_t>(get_register(instr->RdValue()));
                    uint32_t bitcount = msbit - lsbit + 1;
                    uint32_t mask = 0xFFFFFFFFu >> (32 - bitcount);
                    rd_val &= ~(mask << lsbit);
                    if (instr->RmValue() != 15) {
                        // bfi - bitfield insert.
                        uint32_t rm_val = static_cast<uint32_t>(get_register(instr->RmValue()));
                        rm_val &= mask;
                        rd_val |= rm_val << lsbit;
                    }
                    set_register(instr->RdValue(), rd_val);
                } else {
                    UNREACHABLE();
                }
                return;
            } else {
                // Format(instr, "'memop'cond'b 'rd, ['rn, +'shift_rm]'w");
                addr = rn_val + shifter_operand;
                if (instr->HasW()) {
                    set_register(rn, addr);
                }
            }
            break;
        }
        default: {
            UNREACHABLE();
            break;
        }
        }
        if (instr->HasB()) {
            if (instr->HasL()) {
                uint8_t byte = ReadB(addr);
                set_register(rd, byte);
            } else {
                uint8_t byte = get_register(rd);
                WriteB(addr, byte);
            }
        } else {
            if (instr->HasL()) {
                set_register(rd, ReadW(addr));
            } else {
                WriteW(addr, get_register(rd));
            }
        }
    }

    void Simulator::DecodeType4(Instruction* instr)
    {
        DCHECK_EQ(instr->Bit(22), 0); // only allowed to be set in privileged mode
        if (instr->HasL()) {
            // Format(instr, "ldm'cond'pu 'rn'w, 'rlist");
            HandleRList(instr, true);
        } else {
            // Format(instr, "stm'cond'pu 'rn'w, 'rlist");
            HandleRList(instr, false);
        }
    }

    void Simulator::DecodeType5(Instruction* instr)
    {
        // Format(instr, "b'l'cond 'target");
        int off = (instr->SImmed24Value() << 2);
        intptr_t pc_address = get_pc();
        if (instr->HasLink()) {
            set_register(lr, pc_address + kInstrSize);
        }
        int pc_reg = get_register(pc);
        set_pc(pc_reg + off);
    }

    void Simulator::DecodeType6(Instruction* instr)
    {
        DecodeType6CoprocessorIns(instr);
    }

    void Simulator::DecodeType7(Instruction* instr)
    {
        if (instr->Bit(24) == 1) {
            SoftwareInterrupt(instr);
        } else {
            switch (instr->CoprocessorValue()) {
            case 10: // Fall through.
            case 11:
                DecodeTypeVFP(instr);
                break;
            case 15:
                DecodeTypeCP15(instr);
                break;
            default:
                UNIMPLEMENTED();
            }
        }
    }

    // void Simulator::DecodeTypeVFP(Instruction* instr)
    // The Following ARMv7 VFPv instructions are currently supported.
    // vmov :Sn = Rt
    // vmov :Rt = Sn
    // vcvt: Dd = Sm
    // vcvt: Sd = Dm
    // vcvt.f64.s32 Dd, Dd, #<fbits>
    // Dd = vabs(Dm)
    // Sd = vabs(Sm)
    // Dd = vneg(Dm)
    // Sd = vneg(Sm)
    // Dd = vadd(Dn, Dm)
    // Sd = vadd(Sn, Sm)
    // Dd = vsub(Dn, Dm)
    // Sd = vsub(Sn, Sm)
    // Dd = vmul(Dn, Dm)
    // Sd = vmul(Sn, Sm)
    // Dd = vdiv(Dn, Dm)
    // Sd = vdiv(Sn, Sm)
    // vcmp(Dd, Dm)
    // vcmp(Sd, Sm)
    // Dd = vsqrt(Dm)
    // Sd = vsqrt(Sm)
    // vmrs
    // vdup.size Qd, Rt.
    void Simulator::DecodeTypeVFP(Instruction* instr)
    {
        DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0));
        DCHECK_EQ(instr->Bits(11, 9), 0x5);
        // Obtain single precision register codes.
        int m = instr->VFPMRegValue(kSinglePrecision);
        int d = instr->VFPDRegValue(kSinglePrecision);
        int n = instr->VFPNRegValue(kSinglePrecision);
        // Obtain double precision register codes.
        int vm = instr->VFPMRegValue(kDoublePrecision);
        int vd = instr->VFPDRegValue(kDoublePrecision);
        int vn = instr->VFPNRegValue(kDoublePrecision);

        if (instr->Bit(4) == 0) {
            if (instr->Opc1Value() == 0x7) {
                // Other data processing instructions
                if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x1)) {
                    // vmov register to register.
                    if (instr->SzValue() == 0x1) {
                        uint32_t data[2];
                        get_d_register(vm, data);
                        set_d_register(vd, data);
                    } else {
                        set_s_register(d, get_s_register(m));
                    }
                } else if ((instr->Opc2Value() == 0x0) && (instr->Opc3Value() == 0x3)) {
                    // vabs
                    if (instr->SzValue() == 0x1) {
                        Float64 dm = get_double_from_d_register(vm);
                        constexpr uint64_t kSignBit64 = uint64_t { 1 } << 63;
                        Float64 dd = Float64::FromBits(dm.get_bits() & ~kSignBit64);
                        dd = canonicalizeNaN(dd);
                        set_d_register_from_double(vd, dd);
                    } else {
                        Float32 sm = get_float_from_s_register(m);
                        constexpr uint32_t kSignBit32 = uint32_t { 1 } << 31;
                        Float32 sd = Float32::FromBits(sm.get_bits() & ~kSignBit32);
                        sd = canonicalizeNaN(sd);
                        set_s_register_from_float(d, sd);
                    }
                } else if ((instr->Opc2Value() == 0x1) && (instr->Opc3Value() == 0x1)) {
                    // vneg
                    if (instr->SzValue() == 0x1) {
                        Float64 dm = get_double_from_d_register(vm);
                        constexpr uint64_t kSignBit64 = uint64_t { 1 } << 63;
                        Float64 dd = Float64::FromBits(dm.get_bits() ^ kSignBit64);
                        dd = canonicalizeNaN(dd);
                        set_d_register_from_double(vd, dd);
                    } else {
                        Float32 sm = get_float_from_s_register(m);
                        constexpr uint32_t kSignBit32 = uint32_t { 1 } << 31;
                        Float32 sd = Float32::FromBits(sm.get_bits() ^ kSignBit32);
                        sd = canonicalizeNaN(sd);
                        set_s_register_from_float(d, sd);
                    }
                } else if ((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3)) {
                    DecodeVCVTBetweenDoubleAndSingle(instr);
                } else if ((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) {
                    DecodeVCVTBetweenFloatingPointAndInteger(instr);
                } else if ((instr->Opc2Value() == 0xA) && (instr->Opc3Value() == 0x3) && (instr->Bit(8) == 1)) {
                    // vcvt.f64.s32 Dd, Dd, #<fbits>
                    int fraction_bits = 32 - ((instr->Bits(3, 0) << 1) | instr->Bit(5));
                    int fixed_value = get_sinteger_from_s_register(vd * 2);
                    double divide = 1 << fraction_bits;
                    set_d_register_from_double(vd, fixed_value / divide);
                } else if (((instr->Opc2Value() >> 1) == 0x6) && (instr->Opc3Value() & 0x1)) {
                    DecodeVCVTBetweenFloatingPointAndInteger(instr);
                } else if (((instr->Opc2Value() == 0x4) || (instr->Opc2Value() == 0x5)) && (instr->Opc3Value() & 0x1)) {
                    DecodeVCMP(instr);
                } else if (((instr->Opc2Value() == 0x1)) && (instr->Opc3Value() == 0x3)) {
                    // vsqrt
                    if (instr->SzValue() == 0x1) {
                        double dm_value = get_double_from_d_register(vm).get_scalar();
                        double dd_value = std::sqrt(dm_value);
                        dd_value = canonicalizeNaN(dd_value);
                        set_d_register_from_double(vd, dd_value);
                    } else {
                        float sm_value = get_float_from_s_register(m).get_scalar();
                        float sd_value = std::sqrt(sm_value);
                        sd_value = canonicalizeNaN(sd_value);
                        set_s_register_from_float(d, sd_value);
                    }
                } else if (instr->Opc3Value() == 0x0) {
                    // vmov immediate.
                    if (instr->SzValue() == 0x1) {
                        set_d_register_from_double(vd, instr->DoubleImmedVmov());
                    } else {
                        // Cast double to float.
                        float value = instr->DoubleImmedVmov().get_scalar();
                        set_s_register_from_float(d, value);
                    }
                } else if (((instr->Opc2Value() == 0x6)) && (instr->Opc3Value() == 0x3)) {
                    // vrintz - truncate
                    if (instr->SzValue() == 0x1) {
                        double dm_value = get_double_from_d_register(vm).get_scalar();
                        double dd_value = trunc(dm_value);
                        dd_value = canonicalizeNaN(dd_value);
                        set_d_register_from_double(vd, dd_value);
                    } else {
                        float sm_value = get_float_from_s_register(m).get_scalar();
                        float sd_value = truncf(sm_value);
                        sd_value = canonicalizeNaN(sd_value);
                        set_s_register_from_float(d, sd_value);
                    }
                } else {
                    UNREACHABLE(); // Not used by V8.
                }
            } else if (instr->Opc1Value() == 0x3) {
                if (instr->Opc3Value() & 0x1) {
                    // vsub
                    if (instr->SzValue() == 0x1) {
                        double dn_value = get_double_from_d_register(vn).get_scalar();
                        double dm_value = get_double_from_d_register(vm).get_scalar();
                        double dd_value = dn_value - dm_value;
                        dd_value = canonicalizeNaN(dd_value);
                        set_d_register_from_double(vd, dd_value);
                    } else {
                        float sn_value = get_float_from_s_register(n).get_scalar();
                        float sm_value = get_float_from_s_register(m).get_scalar();
                        float sd_value = sn_value - sm_value;
                        sd_value = canonicalizeNaN(sd_value);
                        set_s_register_from_float(d, sd_value);
                    }
                } else {
                    // vadd
                    if (instr->SzValue() == 0x1) {
                        double dn_value = get_double_from_d_register(vn).get_scalar();
                        double dm_value = get_double_from_d_register(vm).get_scalar();
                        double dd_value = dn_value + dm_value;
                        dd_value = canonicalizeNaN(dd_value);
                        set_d_register_from_double(vd, dd_value);
                    } else {
                        float sn_value = get_float_from_s_register(n).get_scalar();
                        float sm_value = get_float_from_s_register(m).get_scalar();
                        float sd_value = sn_value + sm_value;
                        sd_value = canonicalizeNaN(sd_value);
                        set_s_register_from_float(d, sd_value);
                    }
                }
            } else if ((instr->Opc1Value() == 0x2) && !(instr->Opc3Value() & 0x1)) {
                // vmul
                if (instr->SzValue() == 0x1) {
                    double dn_value = get_double_from_d_register(vn).get_scalar();
                    double dm_value = get_double_from_d_register(vm).get_scalar();
                    double dd_value = dn_value * dm_value;
                    dd_value = canonicalizeNaN(dd_value);
                    set_d_register_from_double(vd, dd_value);
                } else {
                    float sn_value = get_float_from_s_register(n).get_scalar();
                    float sm_value = get_float_from_s_register(m).get_scalar();
                    float sd_value = sn_value * sm_value;
                    sd_value = canonicalizeNaN(sd_value);
                    set_s_register_from_float(d, sd_value);
                }
            } else if ((instr->Opc1Value() == 0x0)) {
                // vmla, vmls
                const bool is_vmls = (instr->Opc3Value() & 0x1);
                if (instr->SzValue() == 0x1) {
                    const double dd_val = get_double_from_d_register(vd).get_scalar();
                    const double dn_val = get_double_from_d_register(vn).get_scalar();
                    const double dm_val = get_double_from_d_register(vm).get_scalar();

                    // Note: we do the mul and add/sub in separate steps to avoid getting a
                    // result with too high precision.
                    const double res = dn_val * dm_val;
                    set_d_register_from_double(vd, res);
                    if (is_vmls) {
                        set_d_register_from_double(vd, canonicalizeNaN(dd_val - res));
                    } else {
                        set_d_register_from_double(vd, canonicalizeNaN(dd_val + res));
                    }
                } else {
                    const float sd_val = get_float_from_s_register(d).get_scalar();
                    const float sn_val = get_float_from_s_register(n).get_scalar();
                    const float sm_val = get_float_from_s_register(m).get_scalar();

                    // Note: we do the mul and add/sub in separate steps to avoid getting a
                    // result with too high precision.
                    const float res = sn_val * sm_val;
                    set_s_register_from_float(d, res);
                    if (is_vmls) {
                        set_s_register_from_float(d, canonicalizeNaN(sd_val - res));
                    } else {
                        set_s_register_from_float(d, canonicalizeNaN(sd_val + res));
                    }
                }
            } else if ((instr->Opc1Value() == 0x4) && !(instr->Opc3Value() & 0x1)) {
                // vdiv
                if (instr->SzValue() == 0x1) {
                    double dn_value = get_double_from_d_register(vn).get_scalar();
                    double dm_value = get_double_from_d_register(vm).get_scalar();
                    double dd_value = dn_value / dm_value;
                    div_zero_vfp_flag_ = (dm_value == 0);
                    dd_value = canonicalizeNaN(dd_value);
                    set_d_register_from_double(vd, dd_value);
                } else {
                    float sn_value = get_float_from_s_register(n).get_scalar();
                    float sm_value = get_float_from_s_register(m).get_scalar();
                    float sd_value = sn_value / sm_value;
                    div_zero_vfp_flag_ = (sm_value == 0);
                    sd_value = canonicalizeNaN(sd_value);
                    set_s_register_from_float(d, sd_value);
                }
            } else {
                UNIMPLEMENTED(); // Not used by V8.
            }
        } else {
            if ((instr->VCValue() == 0x0) && (instr->VAValue() == 0x0)) {
                DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(instr);
            } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x1)) {
                if (instr->Bit(23) == 0) {
                    // vmov (ARM core register to scalar)
                    int vd = instr->VFPNRegValue(kDoublePrecision);
                    int rt = instr->RtValue();
                    int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
                    if ((opc1_opc2 & 0xB) == 0) {
                        // NeonS32/NeonU32
                        uint32_t data[2];
                        get_d_register(vd, data);
                        data[instr->Bit(21)] = get_register(rt);
                        set_d_register(vd, data);
                    } else {
                        uint64_t data;
                        get_d_register(vd, &data);
                        uint64_t rt_value = get_register(rt);
                        if ((opc1_opc2 & 0x8) != 0) {
                            // NeonS8 / NeonU8
                            int i = opc1_opc2 & 0x7;
                            int shift = i * kBitsPerByte;
                            const uint64_t mask = 0xFF;
                            data &= ~(mask << shift);
                            data |= (rt_value & mask) << shift;
                            set_d_register(vd, &data);
                        } else if ((opc1_opc2 & 0x1) != 0) {
                            // NeonS16 / NeonU16
                            int i = (opc1_opc2 >> 1) & 0x3;
                            int shift = i * kBitsPerByte * kShortSize;
                            const uint64_t mask = 0xFFFF;
                            data &= ~(mask << shift);
                            data |= (rt_value & mask) << shift;
                            set_d_register(vd, &data);
                        } else {
                            UNREACHABLE(); // Not used by V8.
                        }
                    }
                } else {
                    // vdup.size Qd, Rt.
                    NeonSize size = Neon32;
                    if (instr->Bit(5) != 0)
                        size = Neon16;
                    else if (instr->Bit(22) != 0)
                        size = Neon8;
                    int vd = instr->VFPNRegValue(kSimd128Precision);
                    int rt = instr->RtValue();
                    uint32_t rt_value = get_register(rt);
                    uint32_t q_data[4];
                    switch (size) {
                    case Neon8: {
                        rt_value &= 0xFF;
                        uint8_t* dst = reinterpret_cast<uint8_t*>(q_data);
                        for (int i = 0; i < 16; i++) {
                            dst[i] = rt_value;
                        }
                        break;
                    }
                    case Neon16: {
                        // Perform pairwise op.
                        rt_value &= 0xFFFFu;
                        uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu);
                        for (int i = 0; i < 4; i++) {
                            q_data[i] = rt_rt;
                        }
                        break;
                    }
                    case Neon32: {
                        for (int i = 0; i < 4; i++) {
                            q_data[i] = rt_value;
                        }
                        break;
                    }
                    default:
                        UNREACHABLE();
                        break;
                    }
                    set_neon_register(vd, q_data);
                }
            } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
                // vmov (scalar to ARM core register)
                int vn = instr->VFPNRegValue(kDoublePrecision);
                int rt = instr->RtValue();
                int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
                uint64_t data;
                get_d_register(vn, &data);
                if ((opc1_opc2 & 0xB) == 0) {
                    // NeonS32 / NeonU32
                    int32_t int_data[2];
                    memcpy(int_data, &data, sizeof(int_data));
                    set_register(rt, int_data[instr->Bit(21)]);
                } else {
                    uint64_t data;
                    get_d_register(vn, &data);
                    bool u = instr->Bit(23) != 0;
                    if ((opc1_opc2 & 0x8) != 0) {
                        // NeonS8 / NeonU8
                        int i = opc1_opc2 & 0x7;
                        int shift = i * kBitsPerByte;
                        uint32_t scalar = (data >> shift) & 0xFFu;
                        if (!u && (scalar & 0x80) != 0)
                            scalar |= 0xFFFFFF00;
                        set_register(rt, scalar);
                    } else if ((opc1_opc2 & 0x1) != 0) {
                        // NeonS16 / NeonU16
                        int i = (opc1_opc2 >> 1) & 0x3;
                        int shift = i * kBitsPerByte * kShortSize;
                        uint32_t scalar = (data >> shift) & 0xFFFFu;
                        if (!u && (scalar & 0x8000) != 0)
                            scalar |= 0xFFFF0000;
                        set_register(rt, scalar);
                    } else {
                        UNREACHABLE(); // Not used by V8.
                    }
                }
            } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x0) && (instr->VAValue() == 0x7) && (instr->Bits(19, 16) == 0x1)) {
                // vmrs
                uint32_t rt = instr->RtValue();
                if (rt == 0xF) {
                    Copy_FPSCR_to_APSR();
                } else {
                    // Emulate FPSCR from the Simulator flags.
                    uint32_t fpscr = (n_flag_FPSCR_ << 31) | (z_flag_FPSCR_ << 30) | (c_flag_FPSCR_ << 29) | (v_flag_FPSCR_ << 28) | (FPSCR_default_NaN_mode_ << 25) | (inexact_vfp_flag_ << 4) | (underflow_vfp_flag_ << 3) | (overflow_vfp_flag_ << 2) | (div_zero_vfp_flag_ << 1) | (inv_op_vfp_flag_ << 0) | (FPSCR_rounding_mode_);
                    set_register(rt, fpscr);
                }
            } else if ((instr->VLValue() == 0x0) && (instr->VCValue() == 0x0) && (instr->VAValue() == 0x7) && (instr->Bits(19, 16) == 0x1)) {
                // vmsr
                uint32_t rt = instr->RtValue();
                if (rt == pc) {
                    UNREACHABLE();
                } else {
                    uint32_t rt_value = get_register(rt);
                    n_flag_FPSCR_ = (rt_value >> 31) & 1;
                    z_flag_FPSCR_ = (rt_value >> 30) & 1;
                    c_flag_FPSCR_ = (rt_value >> 29) & 1;
                    v_flag_FPSCR_ = (rt_value >> 28) & 1;
                    FPSCR_default_NaN_mode_ = (rt_value >> 25) & 1;
                    inexact_vfp_flag_ = (rt_value >> 4) & 1;
                    underflow_vfp_flag_ = (rt_value >> 3) & 1;
                    overflow_vfp_flag_ = (rt_value >> 2) & 1;
                    div_zero_vfp_flag_ = (rt_value >> 1) & 1;
                    inv_op_vfp_flag_ = (rt_value >> 0) & 1;
                    FPSCR_rounding_mode_ = static_cast<VFPRoundingMode>((rt_value)&kVFPRoundingModeMask);
                }
            } else {
                UNIMPLEMENTED(); // Not used by V8.
            }
        }
    }

    void Simulator::DecodeTypeCP15(Instruction* instr)
    {
        DCHECK((instr->TypeValue() == 7) && (instr->Bit(24) == 0x0));
        DCHECK_EQ(instr->CoprocessorValue(), 15);

        if (instr->Bit(4) == 1) {
            // mcr
            int crn = instr->Bits(19, 16);
            int crm = instr->Bits(3, 0);
            int opc1 = instr->Bits(23, 21);
            int opc2 = instr->Bits(7, 5);
            if ((opc1 == 0) && (crn == 7)) {
                // ARMv6 memory barrier operations.
                // Details available in ARM DDI 0406C.b, B3-1750.
                if (((crm == 10) && (opc2 == 5)) || // CP15DMB
                    ((crm == 10) && (opc2 == 4)) || // CP15DSB
                    ((crm == 5) && (opc2 == 4))) { // CP15ISB
                    // These are ignored by the simulator for now.
                } else {
                    UNIMPLEMENTED();
                }
            }
        } else {
            UNIMPLEMENTED();
        }
    }

    void Simulator::DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(
        Instruction* instr)
    {
        DCHECK((instr->Bit(4) == 1) && (instr->VCValue() == 0x0) && (instr->VAValue() == 0x0));

        int t = instr->RtValue();
        int n = instr->VFPNRegValue(kSinglePrecision);
        bool to_arm_register = (instr->VLValue() == 0x1);

        if (to_arm_register) {
            int32_t int_value = get_sinteger_from_s_register(n);
            set_register(t, int_value);
        } else {
            int32_t rs_val = get_register(t);
            set_s_register_from_sinteger(n, rs_val);
        }
    }

    void Simulator::DecodeVCMP(Instruction* instr)
    {
        DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
        DCHECK(((instr->Opc2Value() == 0x4) || (instr->Opc2Value() == 0x5)) && (instr->Opc3Value() & 0x1));
        // Comparison.

        VFPRegPrecision precision = kSinglePrecision;
        if (instr->SzValue() == 0x1) {
            precision = kDoublePrecision;
        }

        int d = instr->VFPDRegValue(precision);
        int m = 0;
        if (instr->Opc2Value() == 0x4) {
            m = instr->VFPMRegValue(precision);
        }

        if (precision == kDoublePrecision) {
            double dd_value = get_double_from_d_register(d).get_scalar();
            double dm_value = 0.0;
            if (instr->Opc2Value() == 0x4) {
                dm_value = get_double_from_d_register(m).get_scalar();
            }

            // Raise exceptions for quiet NaNs if necessary.
            if (instr->Bit(7) == 1) {
                if (/*std::*/isnan(dd_value)) {
                    inv_op_vfp_flag_ = true;
                }
            }

            Compute_FPSCR_Flags(dd_value, dm_value);
        } else {
            float sd_value = get_float_from_s_register(d).get_scalar();
            float sm_value = 0.0;
            if (instr->Opc2Value() == 0x4) {
                sm_value = get_float_from_s_register(m).get_scalar();
            }

            // Raise exceptions for quiet NaNs if necessary.
            if (instr->Bit(7) == 1) {
                if (/*std::*/isnan(sd_value)) {
                    inv_op_vfp_flag_ = true;
                }
            }

            Compute_FPSCR_Flags(sd_value, sm_value);
        }
    }

    void Simulator::DecodeVCVTBetweenDoubleAndSingle(Instruction* instr)
    {
        DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7));
        DCHECK((instr->Opc2Value() == 0x7) && (instr->Opc3Value() == 0x3));

        VFPRegPrecision dst_precision = kDoublePrecision;
        VFPRegPrecision src_precision = kSinglePrecision;
        if (instr->SzValue() == 1) {
            dst_precision = kSinglePrecision;
            src_precision = kDoublePrecision;
        }

        int dst = instr->VFPDRegValue(dst_precision);
        int src = instr->VFPMRegValue(src_precision);

        if (dst_precision == kSinglePrecision) {
            double val = get_double_from_d_register(src).get_scalar();
            set_s_register_from_float(dst, static_cast<float>(val));
        } else {
            float val = get_float_from_s_register(src).get_scalar();
            set_d_register_from_double(dst, static_cast<double>(val));
        }
    }

    bool get_inv_op_vfp_flag(VFPRoundingMode mode,
        double val,
        bool unsigned_)
    {
        DCHECK((mode == RN) || (mode == RM) || (mode == RZ));
        double max_uint = static_cast<double>(0xFFFFFFFFu);
        double max_int = static_cast<double>(kMaxInt);
        double min_int = static_cast<double>(kMinInt);

        // Check for NaN.
        if (val != val) {
            return true;
        }

        // Check for overflow. This code works because 32bit integers can be
        // exactly represented by ieee-754 64bit floating-point values.
        switch (mode) {
        case RN:
            return unsigned_ ? (val >= (max_uint + 0.5)) || (val < -0.5)
                             : (val >= (max_int + 0.5)) || (val < (min_int - 0.5));

        case RM:
            return unsigned_ ? (val >= (max_uint + 1.0)) || (val < 0)
                             : (val >= (max_int + 1.0)) || (val < min_int);

        case RZ:
            return unsigned_ ? (val >= (max_uint + 1.0)) || (val <= -1)
                             : (val >= (max_int + 1.0)) || (val <= (min_int - 1.0));
        default:
            UNREACHABLE();
        }
    }

    // We call this function only if we had a vfp invalid exception.
    // It returns the correct saturated value.
    int VFPConversionSaturate(double val, bool unsigned_res)
    {
        if (val != val) {
            return 0;
        } else {
            if (unsigned_res) {
                return (val < 0) ? 0 : 0xFFFFFFFFu;
            } else {
                return (val < 0) ? kMinInt : kMaxInt;
            }
        }
    }

    int32_t Simulator::ConvertDoubleToInt(double val, bool unsigned_integer,
        VFPRoundingMode mode)
    {
        // TODO(jkummerow): These casts are undefined behavior if the integral
        // part of {val} does not fit into the destination type.
        int32_t result = unsigned_integer ? static_cast<uint32_t>(val) : static_cast<int32_t>(val);

        inv_op_vfp_flag_ = get_inv_op_vfp_flag(mode, val, unsigned_integer);

        double abs_diff = unsigned_integer
            ? std::fabs(val - static_cast<uint32_t>(result))
            : std::fabs(val - result);

        inexact_vfp_flag_ = (abs_diff != 0);

        if (inv_op_vfp_flag_) {
            result = VFPConversionSaturate(val, unsigned_integer);
        } else {
            switch (mode) {
            case RN: {
                int val_sign = (val > 0) ? 1 : -1;
                if (abs_diff > 0.5) {
                    result += val_sign;
                } else if (abs_diff == 0.5) {
                    // Round to even if exactly halfway.
                    result = ((result % 2) == 0) ? result : result + val_sign;
                }
                break;
            }

            case RM:
                result = result > val ? result - 1 : result;
                break;

            case RZ:
                // Nothing to do.
                break;

            default:
                UNREACHABLE();
            }
        }
        return result;
    }

    void Simulator::DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr)
    {
        DCHECK((instr->Bit(4) == 0) && (instr->Opc1Value() == 0x7) && (instr->Bits(27, 23) == 0x1D));
        DCHECK(((instr->Opc2Value() == 0x8) && (instr->Opc3Value() & 0x1)) || (((instr->Opc2Value() >> 1) == 0x6) && (instr->Opc3Value() & 0x1)));

        // Conversion between floating-point and integer.
        bool to_integer = (instr->Bit(18) == 1);

        VFPRegPrecision src_precision = (instr->SzValue() == 1) ? kDoublePrecision
                                                                : kSinglePrecision;

        if (to_integer) {
            // We are playing with code close to the C++ standard's limits below,
            // hence the very simple code and heavy checks.
            //
            // Note:
            // C++ defines default type casting from floating point to integer as
            // (close to) rounding toward zero ("fractional part discarded").

            int dst = instr->VFPDRegValue(kSinglePrecision);
            int src = instr->VFPMRegValue(src_precision);

            // Bit 7 in vcvt instructions indicates if we should use the FPSCR rounding
            // mode or the default Round to Zero mode.
            VFPRoundingMode mode = (instr->Bit(7) != 1) ? FPSCR_rounding_mode_
                                                        : RZ;
            DCHECK((mode == RM) || (mode == RZ) || (mode == RN));

            bool unsigned_integer = (instr->Bit(16) == 0);
            bool double_precision = (src_precision == kDoublePrecision);

            double val = double_precision ? get_double_from_d_register(src).get_scalar()
                                          : get_float_from_s_register(src).get_scalar();

            int32_t temp = ConvertDoubleToInt(val, unsigned_integer, mode);

            // Update the destination register.
            set_s_register_from_sinteger(dst, temp);

        } else {
            bool unsigned_integer = (instr->Bit(7) == 0);

            int dst = instr->VFPDRegValue(src_precision);
            int src = instr->VFPMRegValue(kSinglePrecision);

            int val = get_sinteger_from_s_register(src);

            if (src_precision == kDoublePrecision) {
                if (unsigned_integer) {
                    set_d_register_from_double(
                        dst, static_cast<double>(static_cast<uint32_t>(val)));
                } else {
                    set_d_register_from_double(dst, static_cast<double>(val));
                }
            } else {
                if (unsigned_integer) {
                    set_s_register_from_float(
                        dst, static_cast<float>(static_cast<uint32_t>(val)));
                } else {
                    set_s_register_from_float(dst, static_cast<float>(val));
                }
            }
        }
    }

    // void Simulator::DecodeType6CoprocessorIns(Instruction* instr)
    // Decode Type 6 coprocessor instructions.
    // Dm = vmov(Rt, Rt2)
    // <Rt, Rt2> = vmov(Dm)
    // Ddst = MEM(Rbase + 4*offset).
    // MEM(Rbase + 4*offset) = Dsrc.
    void Simulator::DecodeType6CoprocessorIns(Instruction* instr)
    {
        DCHECK_EQ(instr->TypeValue(), 6);

        if (instr->CoprocessorValue() == 0xA) {
            switch (instr->OpcodeValue()) {
            case 0x8:
            case 0xA:
            case 0xC:
            case 0xE: { // Load and store single precision float to memory.
                int rn = instr->RnValue();
                int vd = instr->VFPDRegValue(kSinglePrecision);
                int offset = instr->Immed8Value();
                if (!instr->HasU()) {
                    offset = -offset;
                }

                int32_t address = get_register(rn) + 4 * offset;
                // Load and store address for singles must be at least four-byte
                // aligned.
                DCHECK_EQ(address % 4, 0);
                if (instr->HasL()) {
                    // Load single from memory: vldr.
                    set_s_register_from_sinteger(vd, ReadW(address));
                } else {
                    // Store single to memory: vstr.
                    WriteW(address, get_sinteger_from_s_register(vd));
                }
                break;
            }
            case 0x4:
            case 0x5:
            case 0x6:
            case 0x7:
            case 0x9:
            case 0xB:
                // Load/store multiple single from memory: vldm/vstm.
                HandleVList(instr);
                break;
            default:
                UNIMPLEMENTED(); // Not used by V8.
            }
        } else if (instr->CoprocessorValue() == 0xB) {
            switch (instr->OpcodeValue()) {
            case 0x2:
                // Load and store double to two GP registers
                if (instr->Bits(7, 6) != 0 || instr->Bit(4) != 1) {
                    UNIMPLEMENTED(); // Not used by V8.
                } else {
                    int rt = instr->RtValue();
                    int rn = instr->RnValue();
                    int vm = instr->VFPMRegValue(kDoublePrecision);
                    if (instr->HasL()) {
                        uint32_t data[2];
                        get_d_register(vm, data);
                        set_register(rt, data[0]);
                        set_register(rn, data[1]);
                    } else {
                        int32_t data[] = { get_register(rt), get_register(rn) };
                        set_d_register(vm, reinterpret_cast<uint32_t*>(data));
                    }
                }
                break;
            case 0x8:
            case 0xA:
            case 0xC:
            case 0xE: { // Load and store double to memory.
                int rn = instr->RnValue();
                int vd = instr->VFPDRegValue(kDoublePrecision);
                int offset = instr->Immed8Value();
                if (!instr->HasU()) {
                    offset = -offset;
                }
                int32_t address = get_register(rn) + 4 * offset;
                // Load and store address for doubles must be at least four-byte
                // aligned.
                DCHECK_EQ(address % 4, 0);
                if (instr->HasL()) {
                    // Load double from memory: vldr.
                    int32_t data[] = { ReadW(address), ReadW(address + 4) };
                    set_d_register(vd, reinterpret_cast<uint32_t*>(data));
                } else {
                    // Store double to memory: vstr.
                    uint32_t data[2];
                    get_d_register(vd, data);
                    WriteW(address, data[0]);
                    WriteW(address + 4, data[1]);
                }
                break;
            }
            case 0x4:
            case 0x5:
            case 0x6:
            case 0x7:
            case 0x9:
            case 0xB:
                // Load/store multiple double from memory: vldm/vstm.
                HandleVList(instr);
                break;
            default:
                UNIMPLEMENTED(); // Not used by V8.
            }
        } else {
            UNIMPLEMENTED(); // Not used by V8.
        }
    }

    // Templated operations for NEON instructions.
    template <typename T, typename U>
    U Widen(T value)
    {
        static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
        static_assert(sizeof(U) > sizeof(T), "T must smaller than U");
        return static_cast<U>(value);
    }

    template <typename T, typename U>
    U Narrow(T value)
    {
        static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger");
        static_assert(sizeof(U) < sizeof(T), "T must larger than U");
        static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(),
            "Signed-ness of T and U must match");
        // Make sure value can be expressed in the smaller type; otherwise, the
        // casted result is implementation defined.
        DCHECK_LE(std::numeric_limits<T>::min(), value);
        DCHECK_GE(std::numeric_limits<T>::max(), value);
        return static_cast<U>(value);
    }

    template <typename T>
    T Clamp(int64_t value)
    {
        static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
        int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
        int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
        int64_t clamped = std::max(min, std::min(max, value));
        return static_cast<T>(clamped);
    }

    template <typename T, typename U>
    void Widen(Simulator* simulator, int Vd, int Vm)
    {
        static const int kLanes = 8 / sizeof(T);
        T src[kLanes];
        U dst[kLanes];
        simulator->get_neon_register<T, kDoubleSize>(Vm, src);
        for (int i = 0; i < kLanes; i++) {
            dst[i] = Widen<T, U>(src[i]);
        }
        simulator->set_neon_register(Vd, dst);
    }

    template <typename T, int SIZE>
    void Abs(Simulator* simulator, int Vd, int Vm)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        for (int i = 0; i < kElems; i++) {
            src[i] = std::abs(src[i]);
        }
        simulator->set_neon_register<T, SIZE>(Vd, src);
    }

    template <typename T, int SIZE>
    void Neg(Simulator* simulator, int Vd, int Vm)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        for (int i = 0; i < kElems; i++) {
            src[i] = -src[i];
        }
        simulator->set_neon_register<T, SIZE>(Vd, src);
    }

    template <typename T, typename U>
    void SaturatingNarrow(Simulator* simulator, int Vd, int Vm)
    {
        static const int kLanes = 16 / sizeof(T);
        T src[kLanes];
        U dst[kLanes];
        simulator->get_neon_register(Vm, src);
        for (int i = 0; i < kLanes; i++) {
            dst[i] = Narrow<T, U>(Clamp<U>(src[i]));
        }
        simulator->set_neon_register<U, kDoubleSize>(Vd, dst);
    }

    template <typename T>
    void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kLanes = 16 / sizeof(T);
        T src1[kLanes], src2[kLanes];
        simulator->get_neon_register(Vn, src1);
        simulator->get_neon_register(Vm, src2);
        for (int i = 0; i < kLanes; i++) {
            src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));
        }
        simulator->set_neon_register(Vd, src1);
    }

    template <typename T>
    void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kLanes = 16 / sizeof(T);
        T src1[kLanes], src2[kLanes];
        simulator->get_neon_register(Vn, src1);
        simulator->get_neon_register(Vm, src2);
        for (int i = 0; i < kLanes; i++) {
            src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
        }
        simulator->set_neon_register(Vd, src1);
    }

    template <typename T, int SIZE>
    void Zip(Simulator* simulator, int Vd, int Vm)
    {
        static const int kElems = SIZE / sizeof(T);
        static const int kPairs = kElems / 2;
        T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
        simulator->get_neon_register<T, SIZE>(Vd, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kPairs; i++) {
            dst1[i * 2] = src1[i];
            dst1[i * 2 + 1] = src2[i];
            dst2[i * 2] = src1[i + kPairs];
            dst2[i * 2 + 1] = src2[i + kPairs];
        }
        simulator->set_neon_register<T, SIZE>(Vd, dst1);
        simulator->set_neon_register<T, SIZE>(Vm, dst2);
    }

    template <typename T, int SIZE>
    void Unzip(Simulator* simulator, int Vd, int Vm)
    {
        static const int kElems = SIZE / sizeof(T);
        static const int kPairs = kElems / 2;
        T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
        simulator->get_neon_register<T, SIZE>(Vd, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kPairs; i++) {
            dst1[i] = src1[i * 2];
            dst1[i + kPairs] = src2[i * 2];
            dst2[i] = src1[i * 2 + 1];
            dst2[i + kPairs] = src2[i * 2 + 1];
        }
        simulator->set_neon_register<T, SIZE>(Vd, dst1);
        simulator->set_neon_register<T, SIZE>(Vm, dst2);
    }

    template <typename T, int SIZE>
    void Transpose(Simulator* simulator, int Vd, int Vm)
    {
        static const int kElems = SIZE / sizeof(T);
        static const int kPairs = kElems / 2;
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vd, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kPairs; i++) {
            std::swap(src1[2 * i + 1], src2[2 * i]);
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
        simulator->set_neon_register<T, SIZE>(Vm, src2);
    }

    template <typename T, int SIZE>
    void Test(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0;
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    template <typename T, int SIZE>
    void Add(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            src1[i] += src2[i];
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    template <typename T, int SIZE>
    void Sub(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            src1[i] -= src2[i];
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    template <typename T, int SIZE>
    void Mul(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            src1[i] *= src2[i];
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    template <typename T, int SIZE>
    void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        for (int i = 0; i < kElems; i++) {
            src[i] <<= shift;
        }
        simulator->set_neon_register<T, SIZE>(Vd, src);
    }

    template <typename T, int SIZE>
    void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        for (int i = 0; i < kElems; i++) {
            src[i] >>= shift;
        }
        simulator->set_neon_register<T, SIZE>(Vd, src);
    }

    template <typename T, int SIZE>
    void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        for (int i = 0; i < kElems; i++) {
            src[i] = ArithmeticShiftRight(src[i], shift);
        }
        simulator->set_neon_register<T, SIZE>(Vd, src);
    }

    template <typename T, int SIZE>
    void ShiftLeftAndInsert(Simulator* simulator, int Vd, int Vm, int shift)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        T dst[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        simulator->get_neon_register<T, SIZE>(Vd, dst);
        uint64_t mask = (1llu << shift) - 1llu;
        for (int i = 0; i < kElems; i++) {
            dst[i] = (src[i] << shift) | (dst[i] & mask);
        }
        simulator->set_neon_register<T, SIZE>(Vd, dst);
    }

    template <typename T, int SIZE>
    void ShiftRightAndInsert(Simulator* simulator, int Vd, int Vm, int shift)
    {
        static const int kElems = SIZE / sizeof(T);
        T src[kElems];
        T dst[kElems];
        simulator->get_neon_register<T, SIZE>(Vm, src);
        simulator->get_neon_register<T, SIZE>(Vd, dst);
        uint64_t mask = ~((1llu << (kBitsPerByte * SIZE - shift)) - 1llu);
        for (int i = 0; i < kElems; i++) {
            dst[i] = (src[i] >> shift) | (dst[i] & mask);
        }
        simulator->set_neon_register<T, SIZE>(Vd, dst);
    }

    template <typename T, int SIZE>
    void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            src1[i] = src1[i] == src2[i] ? -1 : 0;
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    template <typename T, int SIZE>
    void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            if (ge)
                src1[i] = src1[i] >= src2[i] ? -1 : 0;
            else
                src1[i] = src1[i] > src2[i] ? -1 : 0;
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    float MinMax(float a, float b, bool is_min)
    {
        return is_min ? JSMin(a, b) : JSMax(a, b);
    }
    template <typename T>
    T MinMax(T a, T b, bool is_min)
    {
        return is_min ? std::min(a, b) : std::max(a, b);
    }

    template <typename T, int SIZE>
    void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min)
    {
        static const int kElems = SIZE / sizeof(T);
        T src1[kElems], src2[kElems];
        simulator->get_neon_register<T, SIZE>(Vn, src1);
        simulator->get_neon_register<T, SIZE>(Vm, src2);
        for (int i = 0; i < kElems; i++) {
            src1[i] = MinMax(src1[i], src2[i], min);
        }
        simulator->set_neon_register<T, SIZE>(Vd, src1);
    }

    template <typename T>
    void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min)
    {
        static const int kElems = kDoubleSize / sizeof(T);
        static const int kPairs = kElems / 2;
        T dst[kElems], src1[kElems], src2[kElems];
        simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
        simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
        for (int i = 0; i < kPairs; i++) {
            dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
            dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
        }
        simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
    }

    template <typename T>
    void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn)
    {
        static const int kElems = kDoubleSize / sizeof(T);
        static const int kPairs = kElems / 2;
        T dst[kElems], src1[kElems], src2[kElems];
        simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
        simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
        for (int i = 0; i < kPairs; i++) {
            dst[i] = src1[i * 2] + src1[i * 2 + 1];
            dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1];
        }
        simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
    }

    void Simulator::DecodeSpecialCondition(Instruction* instr)
    {
        switch (instr->SpecialValue()) {
        case 4: {
            int Vd, Vm, Vn;
            if (instr->Bit(6) == 0) {
                Vd = instr->VFPDRegValue(kDoublePrecision);
                Vm = instr->VFPMRegValue(kDoublePrecision);
                Vn = instr->VFPNRegValue(kDoublePrecision);
            } else {
                Vd = instr->VFPDRegValue(kSimd128Precision);
                Vm = instr->VFPMRegValue(kSimd128Precision);
                Vn = instr->VFPNRegValue(kSimd128Precision);
            }
            switch (instr->Bits(11, 8)) {
            case 0x0: {
                if (instr->Bit(4) == 1) {
                    // vqadd.s<size> Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        AddSaturate<int8_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        AddSaturate<int16_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        AddSaturate<int32_t>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0x1: {
                if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
                    // vmov Qd, Qm.
                    // vorr, Qd, Qm, Qn.
                    uint32_t src1[4];
                    get_neon_register(Vm, src1);
                    if (Vm != Vn) {
                        uint32_t src2[4];
                        get_neon_register(Vn, src2);
                        for (int i = 0; i < 4; i++) {
                            src1[i] = src1[i] | src2[i];
                        }
                    }
                    set_neon_register(Vd, src1);
                } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
                    // vand Qd, Qm, Qn.
                    uint32_t src1[4], src2[4];
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    for (int i = 0; i < 4; i++) {
                        src1[i] = src1[i] & src2[i];
                    }
                    set_neon_register(Vd, src1);
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0x2: {
                if (instr->Bit(4) == 1) {
                    // vqsub.s<size> Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        SubSaturate<int8_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        SubSaturate<int16_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        SubSaturate<int32_t>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0x3: {
                // vcge/vcgt.s<size> Qd, Qm, Qn.
                bool ge = instr->Bit(4) == 1;
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                switch (size) {
                case Neon8:
                    CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
                    break;
                case Neon16:
                    CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
                    break;
                case Neon32:
                    CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0x6: {
                // vmin/vmax.s<size> Qd, Qm, Qn.
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                bool min = instr->Bit(4) != 0;
                switch (size) {
                case Neon8:
                    MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
                    break;
                case Neon16:
                    MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
                    break;
                case Neon32:
                    MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0x8: {
                // vadd/vtst
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                if (instr->Bit(4) == 0) {
                    // vadd.i<size> Qd, Qm, Qn.
                    switch (size) {
                    case Neon8:
                        Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    // vtst.i<size> Qd, Qm, Qn.
                    switch (size) {
                    case Neon8:
                        Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                }
                break;
            }
            case 0x9: {
                if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
                    // vmul.i<size> Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0xA: {
                // vpmin/vpmax.s<size> Dd, Dm, Dn.
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                bool min = instr->Bit(4) != 0;
                switch (size) {
                case Neon8:
                    PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min);
                    break;
                case Neon16:
                    PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min);
                    break;
                case Neon32:
                    PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0xB: {
                // vpadd.i<size> Dd, Dm, Dn.
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                switch (size) {
                case Neon8:
                    PairwiseAdd<int8_t>(this, Vd, Vm, Vn);
                    break;
                case Neon16:
                    PairwiseAdd<int16_t>(this, Vd, Vm, Vn);
                    break;
                case Neon32:
                    PairwiseAdd<int32_t>(this, Vd, Vm, Vn);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0xD: {
                if (instr->Bit(4) == 0) {
                    float src1[4], src2[4];
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    for (int i = 0; i < 4; i++) {
                        if (instr->Bit(21) == 0) {
                            // vadd.f32 Qd, Qm, Qn.
                            src1[i] = src1[i] + src2[i];
                        } else {
                            // vsub.f32 Qd, Qm, Qn.
                            src1[i] = src1[i] - src2[i];
                        }
                    }
                    set_neon_register(Vd, src1);
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0xE: {
                if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
                    // vceq.f32.
                    float src1[4], src2[4];
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    uint32_t dst[4];
                    for (int i = 0; i < 4; i++) {
                        dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
                    }
                    set_neon_register(Vd, dst);
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0xF: {
                if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
                    float src1[4], src2[4];
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    if (instr->Bit(4) == 1) {
                        if (instr->Bit(21) == 0) {
                            // vrecps.f32 Qd, Qm, Qn.
                            for (int i = 0; i < 4; i++) {
                                src1[i] = 2.0f - src1[i] * src2[i];
                            }
                        } else {
                            // vrsqrts.f32 Qd, Qm, Qn.
                            for (int i = 0; i < 4; i++) {
                                src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
                            }
                        }
                    } else {
                        // vmin/vmax.f32 Qd, Qm, Qn.
                        bool min = instr->Bit(21) == 1;
                        for (int i = 0; i < 4; i++) {
                            src1[i] = MinMax(src1[i], src2[i], min);
                        }
                    }
                    set_neon_register(Vd, src1);
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            default:
                UNIMPLEMENTED();
                break;
            }
            break;
        }
        case 5:
            if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && (instr->Bit(4) == 1)) {
                // vmovl signed
                if ((instr->VdValue() & 1) != 0)
                    UNIMPLEMENTED();
                int Vd = instr->VFPDRegValue(kSimd128Precision);
                int Vm = instr->VFPMRegValue(kDoublePrecision);
                int imm3 = instr->Bits(21, 19);
                switch (imm3) {
                case 1:
                    Widen<int8_t, int16_t>(this, Vd, Vm);
                    break;
                case 2:
                    Widen<int16_t, int32_t>(this, Vd, Vm);
                    break;
                case 4:
                    Widen<int32_t, int64_t>(this, Vd, Vm);
                    break;
                default:
                    UNIMPLEMENTED();
                    break;
                }
            } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
                // vext.
                int imm4 = instr->Bits(11, 8);
                int Vd = instr->VFPDRegValue(kSimd128Precision);
                int Vm = instr->VFPMRegValue(kSimd128Precision);
                int Vn = instr->VFPNRegValue(kSimd128Precision);
                uint8_t src1[16], src2[16], dst[16];
                get_neon_register(Vn, src1);
                get_neon_register(Vm, src2);
                int boundary = kSimd128Size - imm4;
                int i = 0;
                for (; i < boundary; i++) {
                    dst[i] = src1[i + imm4];
                }
                for (; i < 16; i++) {
                    dst[i] = src2[i - boundary];
                }
                set_neon_register(Vd, dst);
            } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) {
                // vshl.i<size> Qd, Qm, shift
                int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
                int shift = instr->Bits(21, 16) - size;
                int Vd = instr->VFPDRegValue(kSimd128Precision);
                int Vm = instr->VFPMRegValue(kSimd128Precision);
                NeonSize ns = static_cast<NeonSize>(size / 16);
                switch (ns) {
                case Neon8:
                    ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                case Neon16:
                    ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                case Neon32:
                    ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
            } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
                // vshr.s<size> Qd, Qm, shift
                int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
                int shift = 2 * size - instr->Bits(21, 16);
                int Vd = instr->VFPDRegValue(kSimd128Precision);
                int Vm = instr->VFPMRegValue(kSimd128Precision);
                NeonSize ns = static_cast<NeonSize>(size / 16);
                switch (ns) {
                case Neon8:
                    ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                case Neon16:
                    ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                case Neon32:
                    ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
            } else {
                UNIMPLEMENTED();
            }
            break;
        case 6: {
            int Vd, Vm, Vn;
            if (instr->Bit(6) == 0) {
                Vd = instr->VFPDRegValue(kDoublePrecision);
                Vm = instr->VFPMRegValue(kDoublePrecision);
                Vn = instr->VFPNRegValue(kDoublePrecision);
            } else {
                Vd = instr->VFPDRegValue(kSimd128Precision);
                Vm = instr->VFPMRegValue(kSimd128Precision);
                Vn = instr->VFPNRegValue(kSimd128Precision);
            }
            switch (instr->Bits(11, 8)) {
            case 0x0: {
                if (instr->Bit(4) == 1) {
                    // vqadd.u<size> Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        AddSaturate<uint8_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        AddSaturate<uint16_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        AddSaturate<uint32_t>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0x1: {
                if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
                    // vbsl.size Qd, Qm, Qn.
                    uint32_t dst[4], src1[4], src2[4];
                    get_neon_register(Vd, dst);
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    for (int i = 0; i < 4; i++) {
                        dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
                    }
                    set_neon_register(Vd, dst);
                } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
                    if (instr->Bit(6) == 0) {
                        // veor Dd, Dn, Dm
                        uint64_t src1, src2;
                        get_d_register(Vn, &src1);
                        get_d_register(Vm, &src2);
                        src1 ^= src2;
                        set_d_register(Vd, &src1);

                    } else {
                        // veor Qd, Qn, Qm
                        uint32_t src1[4], src2[4];
                        get_neon_register(Vn, src1);
                        get_neon_register(Vm, src2);
                        for (int i = 0; i < 4; i++)
                            src1[i] ^= src2[i];
                        set_neon_register(Vd, src1);
                    }
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0x2: {
                if (instr->Bit(4) == 1) {
                    // vqsub.u<size> Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        SubSaturate<uint8_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        SubSaturate<uint16_t>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        SubSaturate<uint32_t>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0x3: {
                // vcge/vcgt.u<size> Qd, Qm, Qn.
                bool ge = instr->Bit(4) == 1;
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                switch (size) {
                case Neon8:
                    CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
                    break;
                case Neon16:
                    CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
                    break;
                case Neon32:
                    CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0x6: {
                // vmin/vmax.u<size> Qd, Qm, Qn.
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                bool min = instr->Bit(4) != 0;
                switch (size) {
                case Neon8:
                    MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
                    break;
                case Neon16:
                    MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
                    break;
                case Neon32:
                    MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0x8: {
                if (instr->Bit(4) == 0) {
                    // vsub.size Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else {
                    // vceq.size Qd, Qm, Qn.
                    NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                    switch (size) {
                    case Neon8:
                        CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon16:
                        CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    case Neon32:
                        CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
                        break;
                    default:
                        UNREACHABLE();
                        break;
                    }
                }
                break;
            }
            case 0xA: {
                // vpmin/vpmax.u<size> Dd, Dm, Dn.
                NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
                bool min = instr->Bit(4) != 0;
                switch (size) {
                case Neon8:
                    PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min);
                    break;
                case Neon16:
                    PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min);
                    break;
                case Neon32:
                    PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
                break;
            }
            case 0xD: {
                if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
                    // vmul.f32 Qd, Qn, Qm
                    float src1[4], src2[4];
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    for (int i = 0; i < 4; i++) {
                        src1[i] = src1[i] * src2[i];
                    }
                    set_neon_register(Vd, src1);
                } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 && instr->Bit(4) == 0) {
                    // vpadd.f32 Dd, Dn, Dm
                    PairwiseAdd<float>(this, Vd, Vm, Vn);
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            case 0xE: {
                if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
                    // vcge/vcgt.f32 Qd, Qm, Qn
                    bool ge = instr->Bit(21) == 0;
                    float src1[4], src2[4];
                    get_neon_register(Vn, src1);
                    get_neon_register(Vm, src2);
                    uint32_t dst[4];
                    for (int i = 0; i < 4; i++) {
                        if (ge) {
                            dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
                        } else {
                            dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
                        }
                    }
                    set_neon_register(Vd, dst);
                } else {
                    UNIMPLEMENTED();
                }
                break;
            }
            default:
                UNREACHABLE();
                break;
            }
            break;
        }
        case 7:
            if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && (instr->Bit(4) == 1)) {
                // vmovl unsigned
                if ((instr->VdValue() & 1) != 0)
                    UNIMPLEMENTED();
                int Vd = instr->VFPDRegValue(kSimd128Precision);
                int Vm = instr->VFPMRegValue(kDoublePrecision);
                int imm3 = instr->Bits(21, 19);
                switch (imm3) {
                case 1:
                    Widen<uint8_t, uint16_t>(this, Vd, Vm);
                    break;
                case 2:
                    Widen<uint16_t, uint32_t>(this, Vd, Vm);
                    break;
                case 4:
                    Widen<uint32_t, uint64_t>(this, Vd, Vm);
                    break;
                default:
                    UNIMPLEMENTED();
                    break;
                }
            } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
                if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1) {
                    // vcvt.<Td>.<Tm> Qd, Qm.
                    int Vd = instr->VFPDRegValue(kSimd128Precision);
                    int Vm = instr->VFPMRegValue(kSimd128Precision);
                    uint32_t q_data[4];
                    get_neon_register(Vm, q_data);
                    int op = instr->Bits(8, 7);
                    for (int i = 0; i < 4; i++) {
                        switch (op) {
                        case 0:
                            // f32 <- s32, round towards nearest.
                            q_data[i] = bit_cast<uint32_t>(std::round(
                                static_cast<float>(bit_cast<int32_t>(q_data[i]))));
                            break;
                        case 1:
                            // f32 <- u32, round towards nearest.
                            q_data[i] = bit_cast<uint32_t>(
                                std::round(static_cast<float>(q_data[i])));
                            break;
                        case 2:
                            // s32 <- f32, round to zero.
                            q_data[i] = static_cast<uint32_t>(
                                ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
                            break;
                        case 3:
                            // u32 <- f32, round to zero.
                            q_data[i] = static_cast<uint32_t>(
                                ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
                            break;
                        }
                    }
                    set_neon_register(Vd, q_data);
                } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
                    if (instr->Bit(6) == 0) {
                        // vswp Dd, Dm.
                        uint64_t dval, mval;
                        int vd = instr->VFPDRegValue(kDoublePrecision);
                        int vm = instr->VFPMRegValue(kDoublePrecision);
                        get_d_register(vd, &dval);
                        get_d_register(vm, &mval);
                        set_d_register(vm, &dval);
                        set_d_register(vd, &mval);
                    } else {
                        // vswp Qd, Qm.
                        uint32_t dval[4], mval[4];
                        int vd = instr->VFPDRegValue(kSimd128Precision);
                        int vm = instr->VFPMRegValue(kSimd128Precision);
                        get_neon_register(vd, dval);
                        get_neon_register(vm, mval);
                        set_neon_register(vm, dval);
                        set_neon_register(vd, mval);
                    }
                } else if (instr->Bits(11, 7) == 0x18) {
                    // vdup.<size> Dd, Dm[index].
                    // vdup.<size> Qd, Dm[index].
                    int vm = instr->VFPMRegValue(kDoublePrecision);
                    int imm4 = instr->Bits(19, 16);
                    int size = 0, index = 0, mask = 0;
                    if ((imm4 & 0x1) != 0) {
                        size = 8;
                        index = imm4 >> 1;
                        mask = 0xFFu;
                    } else if ((imm4 & 0x2) != 0) {
                        size = 16;
                        index = imm4 >> 2;
                        mask = 0xFFFFu;
                    } else {
                        size = 32;
                        index = imm4 >> 3;
                        mask = 0xFFFFFFFFu;
                    }
                    uint64_t d_data;
                    get_d_register(vm, &d_data);
                    uint32_t scalar = (d_data >> (size * index)) & mask;
                    uint32_t duped = scalar;
                    for (int i = 1; i < 32 / size; i++) {
                        scalar <<= size;
                        duped |= scalar;
                    }
                    uint32_t result[4] = { duped, duped, duped, duped };
                    if (instr->Bit(6) == 0) {
                        int vd = instr->VFPDRegValue(kDoublePrecision);
                        set_d_register(vd, result);
                    } else {
                        int vd = instr->VFPDRegValue(kSimd128Precision);
                        set_neon_register(vd, result);
                    }
                } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
                    // vmvn Qd, Qm.
                    int vd = instr->VFPDRegValue(kSimd128Precision);
                    int vm = instr->VFPMRegValue(kSimd128Precision);
                    uint32_t q_data[4];
                    get_neon_register(vm, q_data);
                    for (int i = 0; i < 4; i++)
                        q_data[i] = ~q_data[i];
                    set_neon_register(vd, q_data);
                } else if (instr->Bits(11, 10) == 0x2) {
                    // vtb[l,x] Dd, <list>, Dm.
                    int vd = instr->VFPDRegValue(kDoublePrecision);
                    int vn = instr->VFPNRegValue(kDoublePrecision);
                    int vm = instr->VFPMRegValue(kDoublePrecision);
                    int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
                    bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
                    uint64_t destination = 0, indices = 0, result = 0;
                    get_d_register(vd, &destination);
                    get_d_register(vm, &indices);
                    for (int i = 0; i < kDoubleSize; i++) {
                        int shift = i * kBitsPerByte;
                        int index = (indices >> shift) & 0xFF;
                        if (index < table_len) {
                            uint64_t table;
                            get_d_register(vn + index / kDoubleSize, &table);
                            result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
                                << shift;
                        } else if (vtbx) {
                            result |= destination & (0xFFull << shift);
                        }
                    }
                    set_d_register(vd, &result);
                } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
                    NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
                    if (instr->Bit(6) == 0) {
                        int Vd = instr->VFPDRegValue(kDoublePrecision);
                        int Vm = instr->VFPMRegValue(kDoublePrecision);
                        if (instr->Bit(7) == 1) {
                            // vzip.<size> Dd, Dm.
                            switch (size) {
                            case Neon8:
                                Zip<uint8_t, kDoubleSize>(this, Vd, Vm);
                                break;
                            case Neon16:
                                Zip<uint16_t, kDoubleSize>(this, Vd, Vm);
                                break;
                            case Neon32:
                                UNIMPLEMENTED();
                                break;
                            default:
                                UNREACHABLE();
                                break;
                            }
                        } else {
                            // vuzp.<size> Dd, Dm.
                            switch (size) {
                            case Neon8:
                                Unzip<uint8_t, kDoubleSize>(this, Vd, Vm);
                                break;
                            case Neon16:
                                Unzip<uint16_t, kDoubleSize>(this, Vd, Vm);
                                break;
                            case Neon32:
                                UNIMPLEMENTED();
                                break;
                            default:
                                UNREACHABLE();
                                break;
                            }
                        }
                    } else {
                        int Vd = instr->VFPDRegValue(kSimd128Precision);
                        int Vm = instr->VFPMRegValue(kSimd128Precision);
                        if (instr->Bit(7) == 1) {
                            // vzip.<size> Qd, Qm.
                            switch (size) {
                            case Neon8:
                                Zip<uint8_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon16:
                                Zip<uint16_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon32:
                                Zip<uint32_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            default:
                                UNREACHABLE();
                                break;
                            }
                        } else {
                            // vuzp.<size> Qd, Qm.
                            switch (size) {
                            case Neon8:
                                Unzip<uint8_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon16:
                                Unzip<uint16_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon32:
                                Unzip<uint32_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            default:
                                UNREACHABLE();
                                break;
                            }
                        }
                    }
                } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
                    // vrev<op>.size Qd, Qm
                    int Vd = instr->VFPDRegValue(kSimd128Precision);
                    int Vm = instr->VFPMRegValue(kSimd128Precision);
                    NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
                    NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - instr->Bits(8, 7));
                    switch (op) {
                    case Neon16: {
                        DCHECK_EQ(Neon8, size);
                        uint8_t src[16];
                        get_neon_register(Vm, src);
                        for (int i = 0; i < 16; i += 2) {
                            std::swap(src[i], src[i + 1]);
                        }
                        set_neon_register(Vd, src);
                        break;
                    }
                    case Neon32: {
                        switch (size) {
                        case Neon16: {
                            uint16_t src[8];
                            get_neon_register(Vm, src);
                            for (int i = 0; i < 8; i += 2) {
                                std::swap(src[i], src[i + 1]);
                            }
                            set_neon_register(Vd, src);
                            break;
                        }
                        case Neon8: {
                            uint8_t src[16];
                            get_neon_register(Vm, src);
                            for (int i = 0; i < 4; i++) {
                                std::swap(src[i * 4], src[i * 4 + 3]);
                                std::swap(src[i * 4 + 1], src[i * 4 + 2]);
                            }
                            set_neon_register(Vd, src);
                            break;
                        }
                        default:
                            UNREACHABLE();
                            break;
                        }
                        break;
                    }
                    case Neon64: {
                        switch (size) {
                        case Neon32: {
                            uint32_t src[4];
                            get_neon_register(Vm, src);
                            std::swap(src[0], src[1]);
                            std::swap(src[2], src[3]);
                            set_neon_register(Vd, src);
                            break;
                        }
                        case Neon16: {
                            uint16_t src[8];
                            get_neon_register(Vm, src);
                            for (int i = 0; i < 2; i++) {
                                std::swap(src[i * 4], src[i * 4 + 3]);
                                std::swap(src[i * 4 + 1], src[i * 4 + 2]);
                            }
                            set_neon_register(Vd, src);
                            break;
                        }
                        case Neon8: {
                            uint8_t src[16];
                            get_neon_register(Vm, src);
                            for (int i = 0; i < 4; i++) {
                                std::swap(src[i], src[7 - i]);
                                std::swap(src[i + 8], src[15 - i]);
                            }
                            set_neon_register(Vd, src);
                            break;
                        }
                        default:
                            UNREACHABLE();
                            break;
                        }
                        break;
                    }
                    default:
                        UNREACHABLE();
                        break;
                    }
                } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
                    NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
                    if (instr->Bit(6) == 0) {
                        int Vd = instr->VFPDRegValue(kDoublePrecision);
                        int Vm = instr->VFPMRegValue(kDoublePrecision);
                        // vtrn.<size> Dd, Dm.
                        switch (size) {
                        case Neon8:
                            Transpose<uint8_t, kDoubleSize>(this, Vd, Vm);
                            break;
                        case Neon16:
                            Transpose<uint16_t, kDoubleSize>(this, Vd, Vm);
                            break;
                        case Neon32:
                            Transpose<uint32_t, kDoubleSize>(this, Vd, Vm);
                            break;
                        default:
                            UNREACHABLE();
                            break;
                        }
                    } else {
                        int Vd = instr->VFPDRegValue(kSimd128Precision);
                        int Vm = instr->VFPMRegValue(kSimd128Precision);
                        // vtrn.<size> Qd, Qm.
                        switch (size) {
                        case Neon8:
                            Transpose<uint8_t, kSimd128Size>(this, Vd, Vm);
                            break;
                        case Neon16:
                            Transpose<uint16_t, kSimd128Size>(this, Vd, Vm);
                            break;
                        case Neon32:
                            Transpose<uint32_t, kSimd128Size>(this, Vd, Vm);
                            break;
                        default:
                            UNREACHABLE();
                            break;
                        }
                    }
                } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
                    int Vd = instr->VFPDRegValue(kSimd128Precision);
                    int Vm = instr->VFPMRegValue(kSimd128Precision);
                    NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
                    if (instr->Bits(9, 6) == 0xD) {
                        // vabs<type>.<size> Qd, Qm
                        if (instr->Bit(10) != 0) {
                            // floating point (clear sign bits)
                            uint32_t src[4];
                            get_neon_register(Vm, src);
                            for (int i = 0; i < 4; i++) {
                                src[i] &= ~0x80000000;
                            }
                            set_neon_register(Vd, src);
                        } else {
                            // signed integer
                            switch (size) {
                            case Neon8:
                                Abs<int8_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon16:
                                Abs<int16_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon32:
                                Abs<int32_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            default:
                                UNIMPLEMENTED();
                                break;
                            }
                        }
                    } else if (instr->Bits(9, 6) == 0xF) {
                        // vneg<type>.<size> Qd, Qm (signed integer)
                        if (instr->Bit(10) != 0) {
                            // floating point (toggle sign bits)
                            uint32_t src[4];
                            get_neon_register(Vm, src);
                            for (int i = 0; i < 4; i++) {
                                src[i] ^= 0x80000000;
                            }
                            set_neon_register(Vd, src);
                        } else {
                            // signed integer
                            switch (size) {
                            case Neon8:
                                Neg<int8_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon16:
                                Neg<int16_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            case Neon32:
                                Neg<int32_t, kSimd128Size>(this, Vd, Vm);
                                break;
                            default:
                                UNIMPLEMENTED();
                                break;
                            }
                        }
                    } else {
                        UNIMPLEMENTED();
                    }
                } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) {
                    // vrecpe/vrsqrte.f32 Qd, Qm.
                    int Vd = instr->VFPDRegValue(kSimd128Precision);
                    int Vm = instr->VFPMRegValue(kSimd128Precision);
                    uint32_t src[4];
                    get_neon_register(Vm, src);
                    if (instr->Bit(7) == 0) {
                        for (int i = 0; i < 4; i++) {
                            float denom = bit_cast<float>(src[i]);
                            div_zero_vfp_flag_ = (denom == 0);
                            float result = 1.0f / denom;
                            result = canonicalizeNaN(result);
                            src[i] = bit_cast<uint32_t>(result);
                        }
                    } else {
                        for (int i = 0; i < 4; i++) {
                            float radicand = bit_cast<float>(src[i]);
                            float result = 1.0f / std::sqrt(radicand);
                            result = canonicalizeNaN(result);
                            src[i] = bit_cast<uint32_t>(result);
                        }
                    }
                    set_neon_register(Vd, src);
                } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && instr->Bits(7, 6) != 0) {
                    // vqmovn.<type><size> Dd, Qm.
                    int Vd = instr->VFPDRegValue(kDoublePrecision);
                    int Vm = instr->VFPMRegValue(kSimd128Precision);
                    NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
                    bool is_unsigned = instr->Bit(6) != 0;
                    switch (size) {
                    case Neon8: {
                        if (is_unsigned) {
                            SaturatingNarrow<uint16_t, uint8_t>(this, Vd, Vm);
                        } else {
                            SaturatingNarrow<int16_t, int8_t>(this, Vd, Vm);
                        }
                        break;
                    }
                    case Neon16: {
                        if (is_unsigned) {
                            SaturatingNarrow<uint32_t, uint16_t>(this, Vd, Vm);
                        } else {
                            SaturatingNarrow<int32_t, int16_t>(this, Vd, Vm);
                        }
                        break;
                    }
                    case Neon32: {
                        if (is_unsigned) {
                            SaturatingNarrow<uint64_t, uint32_t>(this, Vd, Vm);
                        } else {
                            SaturatingNarrow<int64_t, int32_t>(this, Vd, Vm);
                        }
                        break;
                    }
                    default:
                        UNIMPLEMENTED();
                        break;
                    }
                } else {
                    UNIMPLEMENTED();
                }
            } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
                // vshr.u<size> Qd, Qm, shift
                int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
                int shift = 2 * size - instr->Bits(21, 16);
                int Vd = instr->VFPDRegValue(kSimd128Precision);
                int Vm = instr->VFPMRegValue(kSimd128Precision);
                NeonSize ns = static_cast<NeonSize>(size / 16);
                switch (ns) {
                case Neon8:
                    ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                case Neon16:
                    ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                case Neon32:
                    ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
            } else if (instr->Bits(11, 8) == 0x5 && instr->Bit(6) == 0 && instr->Bit(4) == 1) {
                // vsli.<size> Dd, Dm, shift
                int imm7 = instr->Bits(21, 16);
                if (instr->Bit(7) != 0)
                    imm7 += 64;
                int size = base::bits::RoundDownToPowerOfTwo32(imm7);
                int shift = imm7 - size;
                int Vd = instr->VFPDRegValue(kDoublePrecision);
                int Vm = instr->VFPMRegValue(kDoublePrecision);
                switch (size) {
                case 8:
                    ShiftLeftAndInsert<uint8_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                case 16:
                    ShiftLeftAndInsert<uint16_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                case 32:
                    ShiftLeftAndInsert<uint32_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                case 64:
                    ShiftLeftAndInsert<uint64_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
            } else if (instr->Bits(11, 8) == 0x4 && instr->Bit(6) == 0 && instr->Bit(4) == 1) {
                // vsri.<size> Dd, Dm, shift
                int imm7 = instr->Bits(21, 16);
                if (instr->Bit(7) != 0)
                    imm7 += 64;
                int size = base::bits::RoundDownToPowerOfTwo32(imm7);
                int shift = 2 * size - imm7;
                int Vd = instr->VFPDRegValue(kDoublePrecision);
                int Vm = instr->VFPMRegValue(kDoublePrecision);
                switch (size) {
                case 8:
                    ShiftRightAndInsert<uint8_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                case 16:
                    ShiftRightAndInsert<uint16_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                case 32:
                    ShiftRightAndInsert<uint32_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                case 64:
                    ShiftRightAndInsert<uint64_t, kDoubleSize>(this, Vd, Vm, shift);
                    break;
                default:
                    UNREACHABLE();
                    break;
                }
            } else {
                UNIMPLEMENTED();
            }
            break;
        case 8:
            if (instr->Bits(21, 20) == 0) {
                // vst1
                int Vd = (instr->Bit(22) << 4) | instr->VdValue();
                int Rn = instr->VnValue();
                int type = instr->Bits(11, 8);
                int Rm = instr->VmValue();
                int32_t address = get_register(Rn);
                int regs = 0;
                switch (type) {
                case nlt_1:
                    regs = 1;
                    break;
                case nlt_2:
                    regs = 2;
                    break;
                case nlt_3:
                    regs = 3;
                    break;
                case nlt_4:
                    regs = 4;
                    break;
                default:
                    UNIMPLEMENTED();
                    break;
                }
                int r = 0;
                while (r < regs) {
                    uint32_t data[2];
                    get_d_register(Vd + r, data);
                    WriteW(address, data[0]);
                    WriteW(address + 4, data[1]);
                    address += 8;
                    r++;
                }
                if (Rm != 15) {
                    if (Rm == 13) {
                        set_register(Rn, address);
                    } else {
                        set_register(Rn, get_register(Rn) + get_register(Rm));
                    }
                }
            } else if (instr->Bits(21, 20) == 2) {
                // vld1
                int Vd = (instr->Bit(22) << 4) | instr->VdValue();
                int Rn = instr->VnValue();
                int type = instr->Bits(11, 8);
                int Rm = instr->VmValue();
                int32_t address = get_register(Rn);
                int regs = 0;
                switch (type) {
                case nlt_1:
                    regs = 1;
                    break;
                case nlt_2:
                    regs = 2;
                    break;
                case nlt_3:
                    regs = 3;
                    break;
                case nlt_4:
                    regs = 4;
                    break;
                default:
                    UNIMPLEMENTED();
                    break;
                }
                int r = 0;
                while (r < regs) {
                    uint32_t data[2];
                    data[0] = ReadW(address);
                    data[1] = ReadW(address + 4);
                    set_d_register(Vd + r, data);
                    address += 8;
                    r++;
                }
                if (Rm != 15) {
                    if (Rm == 13) {
                        set_register(Rn, address);
                    } else {
                        set_register(Rn, get_register(Rn) + get_register(Rm));
                    }
                }
            } else {
                UNIMPLEMENTED();
            }
            break;
        case 0xA:
        case 0xB:
            if ((instr->Bits(22, 20) == 5) && (instr->Bits(15, 12) == 0xF)) {
                // pld: ignore instruction.
            } else if (instr->SpecialValue() == 0xA && instr->Bits(22, 20) == 7) {
                // dsb, dmb, isb: ignore instruction for now.
                // TODO(binji): implement
                // Also refer to the ARMv6 CP15 equivalents in DecodeTypeCP15.
            } else {
                UNIMPLEMENTED();
            }
            break;
        case 0x1D:
            if (instr->Opc1Value() == 0x7 && instr->Opc3Value() == 0x1 && instr->Bits(11, 9) == 0x5 && instr->Bits(19, 18) == 0x2) {
                if (instr->SzValue() == 0x1) {
                    int vm = instr->VFPMRegValue(kDoublePrecision);
                    int vd = instr->VFPDRegValue(kDoublePrecision);
                    double dm_value = get_double_from_d_register(vm).get_scalar();
                    double dd_value = 0.0;
                    int rounding_mode = instr->Bits(17, 16);
                    switch (rounding_mode) {
                    case 0x0: // vrinta - round with ties to away from zero
                        dd_value = round(dm_value);
                        break;
                    case 0x1: { // vrintn - round with ties to even
                        dd_value = nearbyint(dm_value);
                        break;
                    }
                    case 0x2: // vrintp - ceil
                        dd_value = ceil(dm_value);
                        break;
                    case 0x3: // vrintm - floor
                        dd_value = floor(dm_value);
                        break;
                    default:
                        UNREACHABLE(); // Case analysis is exhaustive.
                        break;
                    }
                    dd_value = canonicalizeNaN(dd_value);
                    set_d_register_from_double(vd, dd_value);
                } else {
                    int m = instr->VFPMRegValue(kSinglePrecision);
                    int d = instr->VFPDRegValue(kSinglePrecision);
                    float sm_value = get_float_from_s_register(m).get_scalar();
                    float sd_value = 0.0;
                    int rounding_mode = instr->Bits(17, 16);
                    switch (rounding_mode) {
                    case 0x0: // vrinta - round with ties to away from zero
                        sd_value = roundf(sm_value);
                        break;
                    case 0x1: { // vrintn - round with ties to even
                        sd_value = nearbyintf(sm_value);
                        break;
                    }
                    case 0x2: // vrintp - ceil
                        sd_value = ceilf(sm_value);
                        break;
                    case 0x3: // vrintm - floor
                        sd_value = floorf(sm_value);
                        break;
                    default:
                        UNREACHABLE(); // Case analysis is exhaustive.
                        break;
                    }
                    sd_value = canonicalizeNaN(sd_value);
                    set_s_register_from_float(d, sd_value);
                }
            } else if ((instr->Opc1Value() == 0x4) && (instr->Bits(11, 9) == 0x5) && (instr->Bit(4) == 0x0)) {
                if (instr->SzValue() == 0x1) {
                    int m = instr->VFPMRegValue(kDoublePrecision);
                    int n = instr->VFPNRegValue(kDoublePrecision);
                    int d = instr->VFPDRegValue(kDoublePrecision);
                    double dn_value = get_double_from_d_register(n).get_scalar();
                    double dm_value = get_double_from_d_register(m).get_scalar();
                    double dd_value;
                    if (instr->Bit(6) == 0x1) { // vminnm
                        if ((dn_value < dm_value) || /*std::*/isnan(dm_value)) {
                            dd_value = dn_value;
                        } else if ((dm_value < dn_value) || /*std::*/isnan(dn_value)) {
                            dd_value = dm_value;
                        } else {
                            DCHECK_EQ(dn_value, dm_value);
                            // Make sure that we pick the most negative sign for +/-0.
                            dd_value = /*std::*/signbit(dn_value) ? dn_value : dm_value;
                        }
                    } else { // vmaxnm
                        if ((dn_value > dm_value) || /*std::*/isnan(dm_value)) {
                            dd_value = dn_value;
                        } else if ((dm_value > dn_value) || /*std::*/isnan(dn_value)) {
                            dd_value = dm_value;
                        } else {
                            DCHECK_EQ(dn_value, dm_value);
                            // Make sure that we pick the most positive sign for +/-0.
                            dd_value = /*std::*/signbit(dn_value) ? dm_value : dn_value;
                        }
                    }
                    dd_value = canonicalizeNaN(dd_value);
                    set_d_register_from_double(d, dd_value);
                } else {
                    int m = instr->VFPMRegValue(kSinglePrecision);
                    int n = instr->VFPNRegValue(kSinglePrecision);
                    int d = instr->VFPDRegValue(kSinglePrecision);
                    float sn_value = get_float_from_s_register(n).get_scalar();
                    float sm_value = get_float_from_s_register(m).get_scalar();
                    float sd_value;
                    if (instr->Bit(6) == 0x1) { // vminnm
                        if ((sn_value < sm_value) || /*std::*/isnan(sm_value)) {
                            sd_value = sn_value;
                        } else if ((sm_value < sn_value) || /*std::*/isnan(sn_value)) {
                            sd_value = sm_value;
                        } else {
                            DCHECK_EQ(sn_value, sm_value);
                            // Make sure that we pick the most negative sign for +/-0.
                            sd_value = /*std::*/signbit(sn_value) ? sn_value : sm_value;
                        }
                    } else { // vmaxnm
                        if ((sn_value > sm_value) || /*std::*/isnan(sm_value)) {
                            sd_value = sn_value;
                        } else if ((sm_value > sn_value) || /*std::*/isnan(sn_value)) {
                            sd_value = sm_value;
                        } else {
                            DCHECK_EQ(sn_value, sm_value);
                            // Make sure that we pick the most positive sign for +/-0.
                            sd_value = /*std::*/signbit(sn_value) ? sm_value : sn_value;
                        }
                    }
                    sd_value = canonicalizeNaN(sd_value);
                    set_s_register_from_float(d, sd_value);
                }
            } else {
                UNIMPLEMENTED();
            }
            break;
        case 0x1C:
            if ((instr->Bits(11, 9) == 0x5) && (instr->Bit(6) == 0) && (instr->Bit(4) == 0)) {
                // VSEL* (floating-point)
                bool condition_holds;
                switch (instr->Bits(21, 20)) {
                case 0x0: // VSELEQ
                    condition_holds = (z_flag_ == 1);
                    break;
                case 0x1: // VSELVS
                    condition_holds = (v_flag_ == 1);
                    break;
                case 0x2: // VSELGE
                    condition_holds = (n_flag_ == v_flag_);
                    break;
                case 0x3: // VSELGT
                    condition_holds = ((z_flag_ == 0) && (n_flag_ == v_flag_));
                    break;
                default:
                    UNREACHABLE(); // Case analysis is exhaustive.
                    break;
                }
                if (instr->SzValue() == 0x1) {
                    int n = instr->VFPNRegValue(kDoublePrecision);
                    int m = instr->VFPMRegValue(kDoublePrecision);
                    int d = instr->VFPDRegValue(kDoublePrecision);
                    Float64 result = get_double_from_d_register(condition_holds ? n : m);
                    set_d_register_from_double(d, result);
                } else {
                    int n = instr->VFPNRegValue(kSinglePrecision);
                    int m = instr->VFPMRegValue(kSinglePrecision);
                    int d = instr->VFPDRegValue(kSinglePrecision);
                    Float32 result = get_float_from_s_register(condition_holds ? n : m);
                    set_s_register_from_float(d, result);
                }
            } else {
                UNIMPLEMENTED();
            }
            break;
        default:
            UNIMPLEMENTED();
            break;
        }
    }

    // Executes the current instruction.
    void Simulator::InstructionDecode(Instruction* instr)
    {
        if (v8::internal::FLAG_check_icache) {
            CheckICache(i_cache(), instr);
        }
        pc_modified_ = false;
        if (::v8::internal::FLAG_trace_sim) {
            disasm::NameConverter converter;
            disasm::Disassembler dasm(converter);
            // use a reasonably large buffer
            v8::internal::EmbeddedVector<char, 256> buffer;
            dasm.InstructionDecode(buffer,
                reinterpret_cast<byte*>(instr));
            PrintF("  0x%08" V8PRIxPTR "  %s\n", reinterpret_cast<intptr_t>(instr),
                buffer.start());
        }
        if (instr->ConditionField() == kSpecialCondition) {
            DecodeSpecialCondition(instr);
        } else if (ConditionallyExecute(instr)) {
            switch (instr->TypeValue()) {
            case 0:
            case 1: {
                DecodeType01(instr);
                break;
            }
            case 2: {
                DecodeType2(instr);
                break;
            }
            case 3: {
                DecodeType3(instr);
                break;
            }
            case 4: {
                DecodeType4(instr);
                break;
            }
            case 5: {
                DecodeType5(instr);
                break;
            }
            case 6: {
                DecodeType6(instr);
                break;
            }
            case 7: {
                DecodeType7(instr);
                break;
            }
            default: {
                UNIMPLEMENTED();
                break;
            }
            }
        }
        if (!pc_modified_) {
            set_register(pc, reinterpret_cast<int32_t>(instr) + kInstrSize);
        }
    }

    void Simulator::Execute()
    {
        // Get the PC to simulate. Cannot use the accessor here as we need the
        // raw PC value and not the one used as input to arithmetic instructions.
        int program_counter = get_pc();

        if (::v8::internal::FLAG_stop_sim_at == 0) {
            // Fast version of the dispatch loop without checking whether the simulator
            // should be stopping at a particular executed instruction.
            while (program_counter != end_sim_pc) {
                Instruction* instr = reinterpret_cast<Instruction*>(program_counter);
                icount_++;
                InstructionDecode(instr);
                program_counter = get_pc();
            }
        } else {
            // FLAG_stop_sim_at is at the non-default value. Stop in the debugger when
            // we reach the particular instruction count.
            while (program_counter != end_sim_pc) {
                Instruction* instr = reinterpret_cast<Instruction*>(program_counter);
                icount_++;
                if (icount_ == ::v8::internal::FLAG_stop_sim_at) {
                    ArmDebugger dbg(this);
                    dbg.Debug();
                } else {
                    InstructionDecode(instr);
                }
                program_counter = get_pc();
            }
        }
    }

    void Simulator::CallInternal(Address entry)
    {
        // Adjust JS-based stack limit to C-based stack limit.
        isolate_->stack_guard()->AdjustStackLimitForSimulator();

        // Prepare to execute the code at entry
        set_register(pc, static_cast<int32_t>(entry));
        // Put down marker for end of simulation. The simulator will stop simulation
        // when the PC reaches this value. By saving the "end simulation" value into
        // the LR the simulation stops when returning to this call point.
        set_register(lr, end_sim_pc);

        // Remember the values of callee-saved registers.
        // The code below assumes that r9 is not used as sb (static base) in
        // simulator code and therefore is regarded as a callee-saved register.
        int32_t r4_val = get_register(r4);
        int32_t r5_val = get_register(r5);
        int32_t r6_val = get_register(r6);
        int32_t r7_val = get_register(r7);
        int32_t r8_val = get_register(r8);
        int32_t r9_val = get_register(r9);
        int32_t r10_val = get_register(r10);
        int32_t r11_val = get_register(r11);

        // Set up the callee-saved registers with a known value. To be able to check
        // that they are preserved properly across JS execution.
        int32_t callee_saved_value = icount_;
        set_register(r4, callee_saved_value);
        set_register(r5, callee_saved_value);
        set_register(r6, callee_saved_value);
        set_register(r7, callee_saved_value);
        set_register(r8, callee_saved_value);
        set_register(r9, callee_saved_value);
        set_register(r10, callee_saved_value);
        set_register(r11, callee_saved_value);

        // Start the simulation
        Execute();

        // Check that the callee-saved registers have been preserved.
        CHECK_EQ(callee_saved_value, get_register(r4));
        CHECK_EQ(callee_saved_value, get_register(r5));
        CHECK_EQ(callee_saved_value, get_register(r6));
        CHECK_EQ(callee_saved_value, get_register(r7));
        CHECK_EQ(callee_saved_value, get_register(r8));
        CHECK_EQ(callee_saved_value, get_register(r9));
        CHECK_EQ(callee_saved_value, get_register(r10));
        CHECK_EQ(callee_saved_value, get_register(r11));

        // Restore callee-saved registers with the original value.
        set_register(r4, r4_val);
        set_register(r5, r5_val);
        set_register(r6, r6_val);
        set_register(r7, r7_val);
        set_register(r8, r8_val);
        set_register(r9, r9_val);
        set_register(r10, r10_val);
        set_register(r11, r11_val);
    }

    intptr_t Simulator::CallImpl(Address entry, int argument_count,
        const intptr_t* arguments)
    {
        // Set up arguments

        // First four arguments passed in registers.
        int reg_arg_count = std::min(4, argument_count);
        if (reg_arg_count > 0)
            set_register(r0, arguments[0]);
        if (reg_arg_count > 1)
            set_register(r1, arguments[1]);
        if (reg_arg_count > 2)
            set_register(r2, arguments[2]);
        if (reg_arg_count > 3)
            set_register(r3, arguments[3]);

        // Remaining arguments passed on stack.
        int original_stack = get_register(sp);
        // Compute position of stack on entry to generated code.
        int entry_stack = (original_stack - (argument_count - 4) * sizeof(int32_t));
        if (base::OS::ActivationFrameAlignment() != 0) {
            entry_stack &= -base::OS::ActivationFrameAlignment();
        }
        // Store remaining arguments on stack, from low to high memory.
        memcpy(reinterpret_cast<intptr_t*>(entry_stack), arguments + reg_arg_count,
            (argument_count - reg_arg_count) * sizeof(*arguments));
        set_register(sp, entry_stack);

        CallInternal(entry);

        // Pop stack passed arguments.
        CHECK_EQ(entry_stack, get_register(sp));
        set_register(sp, original_stack);

        return get_register(r0);
    }

    intptr_t Simulator::CallFPImpl(Address entry, double d0, double d1)
    {
        if (use_eabi_hardfloat()) {
            set_d_register_from_double(0, d0);
            set_d_register_from_double(1, d1);
        } else {
            set_register_pair_from_double(0, &d0);
            set_register_pair_from_double(2, &d1);
        }
        CallInternal(entry);
        return get_register(r0);
    }

    uintptr_t Simulator::PushAddress(uintptr_t address)
    {
        int new_sp = get_register(sp) - sizeof(uintptr_t);
        uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(new_sp);
        *stack_slot = address;
        set_register(sp, new_sp);
        return new_sp;
    }

    uintptr_t Simulator::PopAddress()
    {
        int current_sp = get_register(sp);
        uintptr_t* stack_slot = reinterpret_cast<uintptr_t*>(current_sp);
        uintptr_t address = *stack_slot;
        set_register(sp, current_sp + sizeof(uintptr_t));
        return address;
    }

    Simulator::LocalMonitor::LocalMonitor()
        : access_state_(MonitorAccess::Open)
        , tagged_addr_(0)
        , size_(TransactionSize::None)
    {
    }

    void Simulator::LocalMonitor::Clear()
    {
        access_state_ = MonitorAccess::Open;
        tagged_addr_ = 0;
        size_ = TransactionSize::None;
    }

    void Simulator::LocalMonitor::NotifyLoad(int32_t addr)
    {
        if (access_state_ == MonitorAccess::Exclusive) {
            // A load could cause a cache eviction which will affect the monitor. As a
            // result, it's most strict to unconditionally clear the local monitor on
            // load.
            Clear();
        }
    }

    void Simulator::LocalMonitor::NotifyLoadExcl(int32_t addr,
        TransactionSize size)
    {
        access_state_ = MonitorAccess::Exclusive;
        tagged_addr_ = addr;
        size_ = size;
    }

    void Simulator::LocalMonitor::NotifyStore(int32_t addr)
    {
        if (access_state_ == MonitorAccess::Exclusive) {
            // It is implementation-defined whether a non-exclusive store to an address
            // covered by the local monitor during exclusive access transitions to open
            // or exclusive access. See ARM DDI 0406C.b, A3.4.1.
            //
            // However, a store could cause a cache eviction which will affect the
            // monitor. As a result, it's most strict to unconditionally clear the
            // local monitor on store.
            Clear();
        }
    }

    bool Simulator::LocalMonitor::NotifyStoreExcl(int32_t addr,
        TransactionSize size)
    {
        if (access_state_ == MonitorAccess::Exclusive) {
            // It is allowed for a processor to require that the address matches
            // exactly (A3.4.5), so this comparison does not mask addr.
            if (addr == tagged_addr_ && size_ == size) {
                Clear();
                return true;
            } else {
                // It is implementation-defined whether an exclusive store to a
                // non-tagged address will update memory. Behavior is unpredictable if
                // the transaction size of the exclusive store differs from that of the
                // exclusive load. See ARM DDI 0406C.b, A3.4.5.
                Clear();
                return false;
            }
        } else {
            DCHECK(access_state_ == MonitorAccess::Open);
            return false;
        }
    }

    Simulator::GlobalMonitor::Processor::Processor()
        : access_state_(MonitorAccess::Open)
        , tagged_addr_(0)
        , next_(nullptr)
        , prev_(nullptr)
        , failure_counter_(0)
    {
    }

    void Simulator::GlobalMonitor::Processor::Clear_Locked()
    {
        access_state_ = MonitorAccess::Open;
        tagged_addr_ = 0;
    }

    void Simulator::GlobalMonitor::Processor::NotifyLoadExcl_Locked(int32_t addr)
    {
        access_state_ = MonitorAccess::Exclusive;
        tagged_addr_ = addr;
    }

    void Simulator::GlobalMonitor::Processor::NotifyStore_Locked(
        int32_t addr, bool is_requesting_processor)
    {
        if (access_state_ == MonitorAccess::Exclusive) {
            // It is implementation-defined whether a non-exclusive store by the
            // requesting processor to an address covered by the global monitor
            // during exclusive access transitions to open or exclusive access.
            //
            // For any other processor, the access state always transitions to open
            // access.
            //
            // See ARM DDI 0406C.b, A3.4.2.
            //
            // However, similar to the local monitor, it is possible that a store
            // caused a cache eviction, which can affect the montior, so
            // conservatively, we always clear the monitor.
            Clear_Locked();
        }
    }

    bool Simulator::GlobalMonitor::Processor::NotifyStoreExcl_Locked(
        int32_t addr, bool is_requesting_processor)
    {
        if (access_state_ == MonitorAccess::Exclusive) {
            if (is_requesting_processor) {
                // It is allowed for a processor to require that the address matches
                // exactly (A3.4.5), so this comparison does not mask addr.
                if (addr == tagged_addr_) {
                    // The access state for the requesting processor after a successful
                    // exclusive store is implementation-defined, but according to the ARM
                    // DDI, this has no effect on the subsequent operation of the global
                    // monitor.
                    Clear_Locked();
                    // Introduce occasional strex failures. This is to simulate the
                    // behavior of hardware, which can randomly fail due to background
                    // cache evictions.
                    if (failure_counter_++ >= kMaxFailureCounter) {
                        failure_counter_ = 0;
                        return false;
                    } else {
                        return true;
                    }
                }
            } else if ((addr & kExclusiveTaggedAddrMask) == (tagged_addr_ & kExclusiveTaggedAddrMask)) {
                // Check the masked addresses when responding to a successful lock by
                // another processor so the implementation is more conservative (i.e. the
                // granularity of locking is as large as possible.)
                Clear_Locked();
                return false;
            }
        }
        return false;
    }

    void Simulator::GlobalMonitor::NotifyLoadExcl_Locked(int32_t addr,
        Processor* processor)
    {
        processor->NotifyLoadExcl_Locked(addr);
        PrependProcessor_Locked(processor);
    }

    void Simulator::GlobalMonitor::NotifyStore_Locked(int32_t addr,
        Processor* processor)
    {
        // Notify each processor of the store operation.
        for (Processor* iter = head_; iter; iter = iter->next_) {
            bool is_requesting_processor = iter == processor;
            iter->NotifyStore_Locked(addr, is_requesting_processor);
        }
    }

    bool Simulator::GlobalMonitor::NotifyStoreExcl_Locked(int32_t addr,
        Processor* processor)
    {
        DCHECK(IsProcessorInLinkedList_Locked(processor));
        if (processor->NotifyStoreExcl_Locked(addr, true)) {
            // Notify the other processors that this StoreExcl succeeded.
            for (Processor* iter = head_; iter; iter = iter->next_) {
                if (iter != processor) {
                    iter->NotifyStoreExcl_Locked(addr, false);
                }
            }
            return true;
        } else {
            return false;
        }
    }

    bool Simulator::GlobalMonitor::IsProcessorInLinkedList_Locked(
        Processor* processor) const
    {
        return head_ == processor || processor->next_ || processor->prev_;
    }

    void Simulator::GlobalMonitor::PrependProcessor_Locked(Processor* processor)
    {
        if (IsProcessorInLinkedList_Locked(processor)) {
            return;
        }

        if (head_) {
            head_->prev_ = processor;
        }
        processor->prev_ = nullptr;
        processor->next_ = head_;
        head_ = processor;
    }

    void Simulator::GlobalMonitor::RemoveProcessor(Processor* processor)
    {
        base::MutexGuard lock_guard(&mutex);
        if (!IsProcessorInLinkedList_Locked(processor)) {
            return;
        }

        if (processor->prev_) {
            processor->prev_->next_ = processor->next_;
        } else {
            head_ = processor->next_;
        }
        if (processor->next_) {
            processor->next_->prev_ = processor->prev_;
        }
        processor->prev_ = nullptr;
        processor->next_ = nullptr;
    }

} // namespace internal
} // namespace v8

#endif // USE_SIMULATOR
